10 files changed, 2300 insertions, 0 deletions
diff --git a/daemons/pacemakerd/Makefile.am b/daemons/pacemakerd/Makefile.am
new file mode 100644
index 0000000..fc0e014
--- /dev/null
+++ b/daemons/pacemakerd/Makefile.am
@@ -0,0 +1,37 @@
+#
+# Copyright 2004-2021 the Pacemaker project contributors
+#
+# The version control history for this file may have further details.
+#
+# This source code is licensed under the GNU General Public License version 2
+# or later (GPLv2+) WITHOUT ANY WARRANTY.
+#
+
+include $(top_srcdir)/mk/common.mk
+include $(top_srcdir)/mk/man.mk
+
+sbin_PROGRAMS		= pacemakerd
+
+if BUILD_SYSTEMD
+systemdsystemunit_DATA	= pacemaker.service
+endif
+
+EXTRA_DIST 	= pacemakerd.8.inc
+
+## SOURCES
+
+noinst_HEADERS		= pacemakerd.h
+
+pacemakerd_CFLAGS	= $(CFLAGS_HARDENED_EXE)
+pacemakerd_LDFLAGS	= $(LDFLAGS_HARDENED_EXE)
+
+pacemakerd_LDADD	= $(top_builddir)/lib/cluster/libcrmcluster.la $(top_builddir)/lib/common/libcrmcommon.la
+pacemakerd_LDADD	+= $(CLUSTERLIBS)
+pacemakerd_SOURCES	= pacemakerd.c
+if BUILD_CS_SUPPORT
+pacemakerd_SOURCES	+= pcmkd_corosync.c
+endif
+pacemakerd_SOURCES	+= pcmkd_messages.c
+pacemakerd_SOURCES	+= pcmkd_subdaemons.c
+
+CLEANFILES = $(man8_MANS)
diff --git a/daemons/pacemakerd/pacemaker.combined.upstart.in b/daemons/pacemakerd/pacemaker.combined.upstart.in
new file mode 100644
index 0000000..af59ff0
--- /dev/null
+++ b/daemons/pacemakerd/pacemaker.combined.upstart.in
@@ -0,0 +1,67 @@
+# pacemaker-corosync - High-Availability cluster
+#
+# Starts Corosync cluster engine and Pacemaker cluster manager.
+
+# if you use automatic start, uncomment the line below.
+#start on started local and runlevel [2345]
+
+stop on runlevel [0123456]
+kill timeout 3600
+respawn
+
+env prog=pacemakerd
+env sysconf=@CONFIGDIR@/pacemaker
+env rpm_lockdir=@localstatedir@/lock/subsys
+env deb_lockdir=@localstatedir@/lock
+
+script
+    [ -f "$sysconf" ] && . "$sysconf"
+    exec $prog
+end script
+
+pre-start script
+    pidof corosync || start corosync
+
+    # if you use corosync-notifyd, uncomment the line below.
+    #start corosync-notifyd
+
+    # give it time to fail.
+    sleep 2
+    pidof corosync || { exit 1; }
+
+    # if you use crm_mon, uncomment the line below.
+    #start crm_mon
+end script
+
+post-start script
+    [ -f "$sysconf" ] && . "$sysconf"
+    [ -z "$LOCK_FILE" -a -d "$rpm_lockdir" ] && LOCK_FILE="$rpm_lockdir/pacemaker"
+    [ -z "$LOCK_FILE" -a -d "$deb_lockdir" ] && LOCK_FILE="$deb_lockdir/pacemaker"
+    touch "$LOCK_FILE"
+    pidof $prog > "@localstatedir@/run/$prog.pid"
+end script
+
+post-stop script
+    [ -f "$sysconf" ] && . "$sysconf"
+    [ -z "$LOCK_FILE" -a -d "$rpm_lockdir" ] && LOCK_FILE="$rpm_lockdir/pacemaker"
+    [ -z "$LOCK_FILE" -a -d "$deb_lockdir" ] && LOCK_FILE="$deb_lockdir/pacemaker"
+    rm -f "$LOCK_FILE"
+    rm -f "@localstatedir@/run/$prog.pid"
+
+    # if you use corosync-notifyd, uncomment the line below.
+    #stop corosync-notifyd || true
+
+    # if you use watchdog of corosync, uncomment the line below.
+    #pidof corosync || false
+
+    pidof pacemaker-controld || stop corosync
+
+    # if you want to reboot a machine by watchdog of corosync when
+    # pacemakerd disappeared unexpectedly, uncomment the line below
+    # and invalidate above "respawn" stanza.
+    #pidof pacemaker-controld && killall -q -9 corosync
+
+    # if you use crm_mon, uncomment the line below.
+    #stop crm_mon
+
+end script
diff --git a/daemons/pacemakerd/pacemaker.service.in b/daemons/pacemakerd/pacemaker.service.in
new file mode 100644
index 0000000..3fd53d9
--- /dev/null
+++ b/daemons/pacemakerd/pacemaker.service.in
@@ -0,0 +1,103 @@
+[Unit]
+Description=Pacemaker High Availability Cluster Manager
+Documentation=man:pacemakerd
+Documentation=https://clusterlabs.org/pacemaker/doc/
+
+# DefaultDependencies takes care of sysinit.target,
+# basic.target, and shutdown.target
+
+# We need networking to bind to a network address. It is recommended not to
+# use Wants or Requires with network.target, and not to use
+# network-online.target for server daemons.
+After=network.target
+
+# Time syncs can make the clock jump backward, which messes with logging
+# and failure timestamps, so wait until it's done.
+After=time-sync.target
+
+# Managing systemd resources requires DBus.
+After=dbus.service
+Wants=dbus.service
+
+# Some OCF resources may have dependencies that aren't managed by the cluster;
+# these must be started before Pacemaker and stopped after it. The
+# resource-agents package provides this target, which lets system adminstrators
+# add drop-ins for those dependencies.
+After=resource-agents-deps.target
+Wants=resource-agents-deps.target
+
+After=syslog.service
+After=rsyslog.service
+After=corosync.service
+Requires=corosync.service
+
+# If Pacemaker respawns repeatedly, give up after this many tries in this time
+StartLimitBurst=5
+StartLimitIntervalSec=25s
+
+[Install]
+WantedBy=multi-user.target
+
+
+[Service]
+Type=simple
+KillMode=process
+NotifyAccess=main
+EnvironmentFile=-@CONFIGDIR@/pacemaker
+EnvironmentFile=-@CONFIGDIR@/sbd
+SuccessExitStatus=100
+
+ExecStart=@sbindir@/pacemakerd
+
+# Systemd v227 and above can limit the number of processes spawned by a
+# service. That is a bad idea for an HA cluster resource manager, so disable it
+# by default. The administrator can create a local override if they really want
+# a limit. If your systemd version does not support TasksMax, and you want to
+# get rid of the resulting log warnings, comment out this option.
+TasksMax=infinity
+
+# If pacemakerd doesn't stop, it's probably waiting on a cluster
+# resource.  Sending -KILL will just get the node fenced
+SendSIGKILL=no
+
+# Systemd's default of respawning a failed service after 100ms is too aggressive
+RestartSec=1s
+
+# If we ever hit the StartLimitInterval/StartLimitBurst limit, and the
+# admin wants to stop the cluster while pacemakerd is not running, it
+# might be a good idea to enable the ExecStopPost directive below.
+#
+# However, the node will likely end up being fenced as a result, so it's
+# not enabled by default.
+#
+# ExecStopPost=/usr/bin/killall -TERM pacemaker-attrd pacemaker-based \
+#              pacemaker-controld pacemaker-execd pacemaker-fenced \
+#              pacemaker-schedulerd
+
+# If you want Corosync to stop whenever Pacemaker is stopped,
+# uncomment the next line too:
+#
+# ExecStopPost=/bin/sh -c 'pidof pacemaker-controld || killall -TERM corosync'
+
+# Pacemaker will restart along with Corosync if Corosync is stopped while
+# Pacemaker is running.
+# In this case, if you want to be fenced always (if you do not want to restart)
+# uncomment ExecStopPost below.
+# 
+# ExecStopPost=/bin/sh -c 'pidof corosync || \
+#              /usr/bin/systemctl --no-block stop pacemaker'
+
+# When the service functions properly, it will wait to exit until all resources
+# have been stopped on the local node, and potentially across all nodes that
+# are shutting down.  The default of 30min should cover most typical cluster
+# configurations, but it may need an increase to adapt to local conditions
+# (e.g. a large, clustered database could conceivably take longer to stop).
+TimeoutStopSec=30min
+TimeoutStartSec=60s
+
+# Restart options include: no, on-success, on-failure, on-abort or always
+Restart=on-failure
+
+# crm_perror() writes directly to stderr, so ignore it here
+# to avoid double-logging with the wrong format
+StandardError=null
diff --git a/daemons/pacemakerd/pacemaker.upstart.in b/daemons/pacemakerd/pacemaker.upstart.in
new file mode 100644
index 0000000..7a54bc0
--- /dev/null
+++ b/daemons/pacemakerd/pacemaker.upstart.in
@@ -0,0 +1,33 @@
+# pacemaker - High-Availability cluster resource manager
+#
+# Starts pacemakerd
+
+stop on runlevel [0123456]
+kill timeout 3600
+respawn
+
+env prog=pacemakerd
+env sysconf=@CONFIGDIR@/pacemaker
+env rpm_lockdir=@localstatedir@/lock/subsys
+env deb_lockdir=@localstatedir@/lock
+
+script
+    [ -f "$sysconf" ] && . "$sysconf"
+    exec $prog
+end script
+
+post-start script
+    [ -f "$sysconf" ] && . "$sysconf"
+    [ -z "$LOCK_FILE" -a -d "$rpm_lockdir" ] && LOCK_FILE="$rpm_lockdir/pacemaker"
+    [ -z "$LOCK_FILE" -a -d "$deb_lockdir" ] && LOCK_FILE="$deb_lockdir/pacemaker"
+    touch "$LOCK_FILE"
+    pidof $prog > "@localstatedir@/run/$prog.pid"
+end script
+
+post-stop script
+    [ -f "$sysconf" ] && . "$sysconf"
+    [ -z "$LOCK_FILE" -a -d "$rpm_lockdir" ] && LOCK_FILE="$rpm_lockdir/pacemaker"
+    [ -z "$LOCK_FILE" -a -d "$deb_lockdir" ] && LOCK_FILE="$deb_lockdir/pacemaker"
+    rm -f "$LOCK_FILE"
+    rm -f "@localstatedir@/run/$prog.pid"
+end script
diff --git a/daemons/pacemakerd/pacemakerd.8.inc b/daemons/pacemakerd/pacemakerd.8.inc
new file mode 100644
index 0000000..902af4e
--- /dev/null
+++ b/daemons/pacemakerd/pacemakerd.8.inc
@@ -0,0 +1,5 @@
+[synopsis]
+pacemakerd [options]
+
+/subsidiary Pacemaker daemons/
+.SH OPTIONS
diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c
new file mode 100644
index 0000000..9f77ccc
--- /dev/null
+++ b/daemons/pacemakerd/pacemakerd.c
@@ -0,0 +1,483 @@
+/*
+ * Copyright 2010-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include "pacemakerd.h"
+
+#include <pwd.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include <crm/crm.h>  /* indirectly: CRM_EX_* */
+#include <crm/msg_xml.h>
+#include <crm/common/mainloop.h>
+#include <crm/common/cmdline_internal.h>
+#include <crm/common/ipc_pacemakerd.h>
+#include <crm/common/output_internal.h>
+#include <crm/cluster/internal.h>
+#include <crm/cluster.h>
+
+#define SUMMARY "pacemakerd - primary Pacemaker daemon that launches and monitors all subsidiary Pacemaker daemons"
+
+struct {
+    gboolean features;
+    gboolean foreground;
+    gboolean shutdown;
+    gboolean standby;
+} options;
+
+static pcmk__output_t *out = NULL;
+
+static pcmk__supported_format_t formats[] = {
+    PCMK__SUPPORTED_FORMAT_NONE,
+    PCMK__SUPPORTED_FORMAT_TEXT,
+    PCMK__SUPPORTED_FORMAT_XML,
+    { NULL, NULL, NULL }
+};
+
+PCMK__OUTPUT_ARGS("features")
+static int
+pacemakerd_features(pcmk__output_t *out, va_list args) {
+    out->info(out, "Pacemaker %s (Build: %s)\n Supporting v%s: %s", PACEMAKER_VERSION,
+              BUILD_VERSION, CRM_FEATURE_SET, CRM_FEATURES);
+    return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("features")
+static int
+pacemakerd_features_xml(pcmk__output_t *out, va_list args) {
+    gchar **feature_list = g_strsplit(CRM_FEATURES, " ", 0);
+
+    pcmk__output_xml_create_parent(out, "pacemakerd",
+                                   "version", PACEMAKER_VERSION,
+                                   "build", BUILD_VERSION,
+                                   "feature_set", CRM_FEATURE_SET,
+                                   NULL);
+    out->begin_list(out, NULL, NULL, "features");
+
+    for (char **s = feature_list; *s != NULL; s++) {
+        pcmk__output_create_xml_text_node(out, "feature", *s);
+    }
+
+    out->end_list(out);
+
+    g_strfreev(feature_list);
+    return pcmk_rc_ok;
+}
+
+static pcmk__message_entry_t fmt_functions[] = {
+    { "features", "default", pacemakerd_features },
+    { "features", "xml", pacemakerd_features_xml },
+
+    { NULL, NULL, NULL }
+};
+
+static gboolean
+pid_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
+    return TRUE;
+}
+
+static gboolean
+standby_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
+    options.standby = TRUE;
+    pcmk__set_env_option("node_start_state", "standby");
+    return TRUE;
+}
+
+static GOptionEntry entries[] = {
+    { "features", 'F', 0, G_OPTION_ARG_NONE, &options.features,
+      "Display full version and list of features Pacemaker was built with",
+      NULL },
+    { "foreground", 'f', 0, G_OPTION_ARG_NONE, &options.foreground,
+      "(Ignored) Pacemaker always runs in the foreground",
+      NULL },
+    { "pid-file", 'p', 0, G_OPTION_ARG_CALLBACK, pid_cb,
+      "(Ignored) Daemon pid file location",
+      "FILE" },
+    { "shutdown", 'S', 0, G_OPTION_ARG_NONE, &options.shutdown,
+      "Instruct Pacemaker to shutdown on this machine",
+      NULL },
+    { "standby", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, standby_cb,
+      "Start node in standby state",
+      NULL },
+
+    { NULL }
+};
+
+static void
+pcmk_ignore(int nsig)
+{
+    crm_info("Ignoring signal %s (%d)", strsignal(nsig), nsig);
+}
+
+static void
+pcmk_sigquit(int nsig)
+{
+    pcmk__panic(__func__);
+}
+
+static void
+mcp_chown(const char *path, uid_t uid, gid_t gid)
+{
+    int rc = chown(path, uid, gid);
+
+    if (rc < 0) {
+        crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s",
+                 path, CRM_DAEMON_USER, gid, pcmk_rc_str(errno));
+    }
+}
+
+static void
+create_pcmk_dirs(void)
+{
+    uid_t pcmk_uid = 0;
+    gid_t pcmk_gid = 0;
+
+    const char *dirs[] = {
+        CRM_PACEMAKER_DIR, // core/blackbox/scheduler/CIB files
+        CRM_CORE_DIR,      // core files
+        CRM_BLACKBOX_DIR,  // blackbox dumps
+        PE_STATE_DIR,      // scheduler inputs
+        CRM_CONFIG_DIR,    // the Cluster Information Base (CIB)
+        // Don't build CRM_RSCTMP_DIR, pacemaker-execd will do it
+        NULL
+    };
+
+    if (pcmk_daemon_user(&pcmk_uid, &pcmk_gid) < 0) {
+        crm_err("Cluster user %s does not exist, aborting Pacemaker startup",
+                CRM_DAEMON_USER);
+        crm_exit(CRM_EX_NOUSER);
+    }
+
+    // Used by some resource agents
+    if ((mkdir(CRM_STATE_DIR, 0750) < 0) && (errno != EEXIST)) {
+        crm_warn("Could not create directory " CRM_STATE_DIR ": %s",
+                 pcmk_rc_str(errno));
+    } else {
+        mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid);
+    }
+
+    for (int i = 0; dirs[i] != NULL; ++i) {
+        int rc = pcmk__build_path(dirs[i], 0750);
+
+        if (rc != pcmk_rc_ok) {
+            crm_warn("Could not create directory %s: %s",
+                     dirs[i], pcmk_rc_str(rc));
+        } else {
+            mcp_chown(dirs[i], pcmk_uid, pcmk_gid);
+        }
+    }
+}
+
+static void
+remove_core_file_limit(void)
+{
+    struct rlimit cores;
+
+    // Get current limits
+    if (getrlimit(RLIMIT_CORE, &cores) < 0) {
+        crm_notice("Unable to check system core file limits "
+                   "(consider ensuring the size is unlimited): %s",
+                   strerror(errno));
+        return;
+    }
+
+    // Check whether core dumps are disabled
+    if (cores.rlim_max == 0) {
+        if (geteuid() != 0) { // Yes, and there's nothing we can do about it
+            crm_notice("Core dumps are disabled (consider enabling them)");
+            return;
+        }
+        cores.rlim_max = RLIM_INFINITY; // Yes, but we're root, so enable them
+    }
+
+    // Raise soft limit to hard limit (if not already done)
+    if (cores.rlim_cur != cores.rlim_max) {
+        cores.rlim_cur = cores.rlim_max;
+        if (setrlimit(RLIMIT_CORE, &cores) < 0) {
+            crm_notice("Unable to raise system limit on core file size "
+                       "(consider doing so manually): %s",
+                       strerror(errno));
+            return;
+        }
+    }
+
+    if (cores.rlim_cur == RLIM_INFINITY) {
+        crm_trace("Core file size is unlimited");
+    } else {
+        crm_trace("Core file size is limited to %llu bytes",
+                  (unsigned long long) cores.rlim_cur);
+    }
+}
+
+static void
+pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api,
+                    enum pcmk_ipc_event event_type, crm_exit_t status,
+                    void *event_data, void *user_data)
+{
+    pcmk_pacemakerd_api_reply_t *reply = event_data;
+
+    switch (event_type) {
+        case pcmk_ipc_event_reply:
+            break;
+
+        default:
+            return;
+    }
+
+    if (status != CRM_EX_OK) {
+        out->err(out, "Bad reply from pacemakerd: %s", crm_exit_str(status));
+        return;
+    }
+
+    if (reply->reply_type != pcmk_pacemakerd_reply_shutdown) {
+        out->err(out, "Unknown reply type %d from pacemakerd",
+                 reply->reply_type);
+    }
+}
+
+static GOptionContext *
+build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
+    GOptionContext *context = NULL;
+
+    context = pcmk__build_arg_context(args, "text (default), xml", group, NULL);
+    pcmk__add_main_args(context, entries);
+    return context;
+}
+
+int
+main(int argc, char **argv)
+{
+    int rc = pcmk_rc_ok;
+    crm_exit_t exit_code = CRM_EX_OK;
+
+    GError *error = NULL;
+
+    GOptionGroup *output_group = NULL;
+    pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
+    gchar **processed_args = pcmk__cmdline_preproc(argv, "p");
+    GOptionContext *context = build_arg_context(args, &output_group);
+
+    bool old_instance_connected = false;
+
+    pcmk_ipc_api_t *old_instance = NULL;
+    qb_ipcs_service_t *ipcs = NULL;
+
+    subdaemon_check_progress = time(NULL);
+
+    setenv("LC_ALL", "C", 1); // Ensure logs are in a common language
+
+    crm_log_preinit(NULL, argc, argv);
+    mainloop_add_signal(SIGHUP, pcmk_ignore);
+    mainloop_add_signal(SIGQUIT, pcmk_sigquit);
+
+    pcmk__register_formats(output_group, formats);
+    if (!g_option_context_parse_strv(context, &processed_args, &error)) {
+        exit_code = CRM_EX_USAGE;
+        goto done;
+    }
+
+    rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
+    if ((rc != pcmk_rc_ok) || (out == NULL)) {
+        exit_code = CRM_EX_ERROR;
+        g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s",
+                    args->output_ty, pcmk_rc_str(rc));
+        goto done;
+    }
+
+    pcmk__force_args(context, &error, "%s --xml-simple-list", g_get_prgname());
+
+    pcmk__register_messages(out, fmt_functions);
+
+    if (options.features) {
+        out->message(out, "features");
+        exit_code = CRM_EX_OK;
+        goto done;
+    }
+
+    if (args->version) {
+        out->version(out, false);
+        goto done;
+    }
+
+    pcmk__set_env_option("mcp", "true");
+
+    if (options.shutdown) {
+        pcmk__cli_init_logging("pacemakerd", args->verbosity);
+    } else {
+        crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
+    }
+
+    crm_debug("Checking for existing Pacemaker instance");
+
+    rc = pcmk_new_ipc_api(&old_instance, pcmk_ipc_pacemakerd);
+    if (old_instance == NULL) {
+        out->err(out, "Could not check for existing pacemakerd: %s", pcmk_rc_str(rc));
+        exit_code = pcmk_rc2exitc(rc);
+        goto done;
+    }
+
+    pcmk_register_ipc_callback(old_instance, pacemakerd_event_cb, NULL);
+    rc = pcmk_connect_ipc(old_instance, pcmk_ipc_dispatch_sync);
+    old_instance_connected = pcmk_ipc_is_connected(old_instance);
+
+    if (options.shutdown) {
+        if (old_instance_connected) {
+            rc = pcmk_pacemakerd_api_shutdown(old_instance, crm_system_name);
+            pcmk_dispatch_ipc(old_instance);
+
+            exit_code = pcmk_rc2exitc(rc);
+
+            if (exit_code != CRM_EX_OK) {
+                pcmk_free_ipc_api(old_instance);
+                goto done;
+            }
+
+            /* We get the ACK immediately, and the response right after that,
+             * but it might take a while for pacemakerd to get around to
+             * shutting down.  Wait for that to happen (with 30-minute timeout).
+             */
+            for (int i = 0; i < 900; i++) {
+                if (!pcmk_ipc_is_connected(old_instance)) {
+                    exit_code = CRM_EX_OK;
+                    pcmk_free_ipc_api(old_instance);
+                    goto done;
+                }
+
+                sleep(2);
+            }
+
+            exit_code = CRM_EX_TIMEOUT;
+            pcmk_free_ipc_api(old_instance);
+            goto done;
+
+        } else {
+            out->err(out, "Could not request shutdown "
+                     "of existing Pacemaker instance: %s", pcmk_rc_str(rc));
+            pcmk_free_ipc_api(old_instance);
+            exit_code = CRM_EX_DISCONNECT;
+            goto done;
+        }
+
+    } else if (old_instance_connected) {
+        pcmk_free_ipc_api(old_instance);
+        crm_err("Aborting start-up because active Pacemaker instance found");
+        exit_code = CRM_EX_FATAL;
+        goto done;
+    }
+
+    pcmk_free_ipc_api(old_instance);
+
+    /* Don't allow any accidental output after this point. */
+    if (out != NULL) {
+        out->finish(out, exit_code, true, NULL);
+        pcmk__output_free(out);
+        out = NULL;
+    }
+
+#ifdef SUPPORT_COROSYNC
+    if (mcp_read_config() == FALSE) {
+        crm_exit(CRM_EX_UNAVAILABLE);
+    }
+#endif
+
+    // OCF shell functions and cluster-glue need facility under different name
+    {
+        const char *facility = pcmk__env_option(PCMK__ENV_LOGFACILITY);
+
+        if (!pcmk__str_eq(facility, PCMK__VALUE_NONE,
+                          pcmk__str_casei|pcmk__str_null_matches)) {
+            setenv("HA_LOGFACILITY", facility, 1);
+        }
+    }
+
+    crm_notice("Starting Pacemaker %s "CRM_XS" build=%s features:%s",
+               PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES);
+    mainloop = g_main_loop_new(NULL, FALSE);
+
+    remove_core_file_limit();
+    create_pcmk_dirs();
+    pcmk__serve_pacemakerd_ipc(&ipcs, &mcp_ipc_callbacks);
+
+#ifdef SUPPORT_COROSYNC
+    /* Allows us to block shutdown */
+    if (!cluster_connect_cfg()) {
+        exit_code = CRM_EX_PROTOCOL;
+        goto done;
+    }
+#endif
+
+    if (pcmk__locate_sbd() > 0) {
+        setenv("PCMK_watchdog", "true", 1);
+        running_with_sbd = TRUE;
+    } else {
+        setenv("PCMK_watchdog", "false", 1);
+    }
+
+    switch (find_and_track_existing_processes()) {
+        case pcmk_rc_ok:
+            break;
+        case pcmk_rc_ipc_unauthorized:
+            exit_code = CRM_EX_CANTCREAT;
+            goto done;
+        default:
+            exit_code = CRM_EX_FATAL;
+            goto done;
+    };
+
+    mainloop_add_signal(SIGTERM, pcmk_shutdown);
+    mainloop_add_signal(SIGINT, pcmk_shutdown);
+
+    if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) {
+        crm_notice("Waiting for startup-trigger from SBD.");
+        pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING;
+        startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL);
+    } else {
+        if (running_with_sbd) {
+            crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported "
+                     "by your SBD version) improve reliability of "
+                     "interworking between SBD & pacemaker.");
+        }
+        pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
+        init_children_processes(NULL);
+    }
+
+    crm_notice("Pacemaker daemon successfully started and accepting connections");
+    g_main_loop_run(mainloop);
+
+    if (ipcs) {
+        crm_trace("Closing IPC server");
+        mainloop_del_ipc_server(ipcs);
+        ipcs = NULL;
+    }
+
+    g_main_loop_unref(mainloop);
+#ifdef SUPPORT_COROSYNC
+    cluster_disconnect_cfg();
+#endif
+
+done:
+    g_strfreev(processed_args);
+    pcmk__free_arg_context(context);
+
+    pcmk__output_and_clear_error(&error, out);
+
+    if (out != NULL) {
+        out->finish(out, exit_code, true, NULL);
+        pcmk__output_free(out);
+    }
+    pcmk__unregister_formats();
+    crm_exit(exit_code);
+}
diff --git a/daemons/pacemakerd/pacemakerd.h b/daemons/pacemakerd/pacemakerd.h
new file mode 100644
index 0000000..b2a6864
--- /dev/null
+++ b/daemons/pacemakerd/pacemakerd.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2010-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdint.h>
+
+#define MAX_RESPAWN		100
+
+extern GMainLoop *mainloop;
+extern struct qb_ipcs_service_handlers mcp_ipc_callbacks;
+extern const char *pacemakerd_state;
+extern gboolean running_with_sbd;
+extern unsigned int shutdown_complete_state_reported_to;
+extern gboolean shutdown_complete_state_reported_client_closed;
+extern crm_trigger_t *shutdown_trigger;
+extern crm_trigger_t *startup_trigger;
+extern time_t subdaemon_check_progress;
+
+gboolean mcp_read_config(void);
+
+gboolean cluster_connect_cfg(void);
+void cluster_disconnect_cfg(void);
+int find_and_track_existing_processes(void);
+gboolean init_children_processes(void *user_data);
+void restart_cluster_subdaemons(void);
+void pcmk_shutdown(int nsig);
+void pcmkd_shutdown_corosync(void);
+bool pcmkd_corosync_connected(void);
diff --git a/daemons/pacemakerd/pcmkd_corosync.c b/daemons/pacemakerd/pcmkd_corosync.c
new file mode 100644
index 0000000..2648756
--- /dev/null
+++ b/daemons/pacemakerd/pcmkd_corosync.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright 2010-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include "pacemakerd.h"
+
+#include <sys/utsname.h>
+#include <sys/stat.h>           /* for calls to stat() */
+#include <libgen.h>             /* For basename() and dirname() */
+
+#include <sys/types.h>
+#include <pwd.h>                /* For getpwname() */
+
+#include <corosync/hdb.h>
+#include <corosync/cfg.h>
+#include <corosync/cpg.h>
+#include <corosync/cmap.h>
+
+#include <crm/cluster/internal.h>
+#include <crm/common/ipc.h>     /* for crm_ipc_is_authentic_process */
+#include <crm/common/mainloop.h>
+
+#include <crm/common/ipc_internal.h>  /* PCMK__SPECIAL_PID* */
+
+static corosync_cfg_handle_t cfg_handle = 0;
+static mainloop_timer_t *reconnect_timer = NULL;
+
+/* =::=::=::= CFG - Shutdown stuff =::=::=::= */
+
+static void
+cfg_shutdown_callback(corosync_cfg_handle_t h, corosync_cfg_shutdown_flags_t flags)
+{
+    crm_info("Corosync wants to shut down: %s",
+             (flags == COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE) ? "immediate" :
+             (flags == COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS) ? "forced" : "optional");
+
+    /* Never allow corosync to shut down while we're running */
+    corosync_cfg_replyto_shutdown(h, COROSYNC_CFG_SHUTDOWN_FLAG_NO);
+}
+
+static corosync_cfg_callbacks_t cfg_callbacks = {
+    .corosync_cfg_shutdown_callback = cfg_shutdown_callback,
+};
+
+static int
+pcmk_cfg_dispatch(gpointer user_data)
+{
+    corosync_cfg_handle_t *handle = (corosync_cfg_handle_t *) user_data;
+    cs_error_t rc = corosync_cfg_dispatch(*handle, CS_DISPATCH_ALL);
+
+    if (rc != CS_OK) {
+        return -1;
+    }
+    return 0;
+}
+
+static void
+close_cfg(void)
+{
+    if (cfg_handle != 0) {
+#ifdef HAVE_COROSYNC_CFG_TRACKSTART
+        /* Ideally, we would call corosync_cfg_trackstop(cfg_handle) here, but a
+         * bug in corosync 3.1.1 and 3.1.2 makes it hang forever. Thankfully,
+         * it's not necessary since we exit immediately after this.
+         */
+#endif
+        corosync_cfg_finalize(cfg_handle);
+        cfg_handle = 0;
+    }
+}
+
+static gboolean
+cluster_reconnect_cb(gpointer data)
+{
+    if (cluster_connect_cfg()) {
+        mainloop_timer_del(reconnect_timer);
+        reconnect_timer = NULL;
+        crm_notice("Cluster reconnect succeeded");
+        mcp_read_config();
+        restart_cluster_subdaemons();
+        return G_SOURCE_REMOVE;
+    } else {
+        crm_info("Cluster reconnect failed "
+                 "(connection will be reattempted once per second)");
+    }
+    /*
+     * In theory this will continue forever. In practice the CIB connection from
+     * attrd will timeout and shut down Pacemaker when it gets bored.
+     */
+    return G_SOURCE_CONTINUE;
+}
+
+
+static void
+cfg_connection_destroy(gpointer user_data)
+{
+    crm_warn("Lost connection to cluster layer "
+             "(connection will be reattempted once per second)");
+    corosync_cfg_finalize(cfg_handle);
+    cfg_handle = 0;
+    reconnect_timer = mainloop_timer_add("corosync reconnect", 1000, TRUE, cluster_reconnect_cb, NULL);
+    mainloop_timer_start(reconnect_timer);
+}
+
+void
+cluster_disconnect_cfg(void)
+{
+    close_cfg();
+    if (reconnect_timer != NULL) {
+        /* The mainloop should be gone by this point, so this isn't necessary,
+         * but cleaning up memory should make valgrind happier.
+         */
+        mainloop_timer_del(reconnect_timer);
+        reconnect_timer = NULL;
+    }
+}
+
+#define cs_repeat(counter, max, code) do {		\
+	code;						\
+	if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) {  \
+	    counter++;					\
+	    crm_debug("Retrying Corosync operation after %ds", counter);    \
+	    sleep(counter);				\
+	} else {                                        \
+            break;                                      \
+	}						\
+    } while(counter < max)
+
+gboolean
+cluster_connect_cfg(void)
+{
+    cs_error_t rc;
+    int fd = -1, retries = 0, rv;
+    uid_t found_uid = 0;
+    gid_t found_gid = 0;
+    pid_t found_pid = 0;
+    uint32_t nodeid;
+
+    static struct mainloop_fd_callbacks cfg_fd_callbacks = {
+        .dispatch = pcmk_cfg_dispatch,
+        .destroy = cfg_connection_destroy,
+    };
+
+    cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks));
+
+    if (rc != CS_OK) {
+        crm_crit("Could not connect to Corosync CFG: %s " CRM_XS " rc=%d",
+                 cs_strerror(rc), rc);
+        return FALSE;
+    }
+
+    rc = corosync_cfg_fd_get(cfg_handle, &fd);
+    if (rc != CS_OK) {
+        crm_crit("Could not get Corosync CFG descriptor: %s " CRM_XS " rc=%d",
+                 cs_strerror(rc), rc);
+        goto bail;
+    }
+
+    /* CFG provider run as root (in given user namespace, anyway)? */
+    if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
+                                            &found_uid, &found_gid))) {
+        crm_crit("Rejecting Corosync CFG provider because process %lld "
+                 "is running as uid %lld gid %lld, not root",
+                  (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
+                 (long long) found_uid, (long long) found_gid);
+        goto bail;
+    } else if (rv < 0) {
+        crm_crit("Could not authenticate Corosync CFG provider: %s "
+                 CRM_XS " rc=%d", strerror(-rv), -rv);
+        goto bail;
+    }
+
+    retries = 0;
+    cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, &nodeid));
+    if (rc != CS_OK) {
+        crm_crit("Could not get local node ID from Corosync: %s "
+                 CRM_XS " rc=%d", cs_strerror(rc), rc);
+        goto bail;
+    }
+    crm_debug("Corosync reports local node ID is %lu", (unsigned long) nodeid);
+
+#ifdef HAVE_COROSYNC_CFG_TRACKSTART
+    retries = 0;
+    cs_repeat(retries, 30, rc = corosync_cfg_trackstart(cfg_handle, 0));
+    if (rc != CS_OK) {
+        crm_crit("Could not enable Corosync CFG shutdown tracker: %s " CRM_XS " rc=%d",
+                 cs_strerror(rc), rc);
+        goto bail;
+    }
+#endif
+
+    mainloop_add_fd("corosync-cfg", G_PRIORITY_DEFAULT, fd, &cfg_handle, &cfg_fd_callbacks);
+    return TRUE;
+
+  bail:
+    corosync_cfg_finalize(cfg_handle);
+    return FALSE;
+}
+
+void
+pcmkd_shutdown_corosync(void)
+{
+    cs_error_t rc;
+
+    if (cfg_handle == 0) {
+        crm_warn("Unable to shut down Corosync: No connection");
+        return;
+    }
+    crm_info("Asking Corosync to shut down");
+    rc = corosync_cfg_try_shutdown(cfg_handle,
+                                    COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE);
+    if (rc == CS_OK) {
+        close_cfg();
+    } else {
+        crm_warn("Corosync shutdown failed: %s " CRM_XS " rc=%d",
+                 cs_strerror(rc), rc);
+    }
+}
+
+bool
+pcmkd_corosync_connected(void)
+{
+    cpg_handle_t local_handle = 0;
+    cpg_model_v1_data_t cpg_model_info = {CPG_MODEL_V1, NULL, NULL, NULL, 0};
+    int fd = -1;
+
+    if (cpg_model_initialize(&local_handle, CPG_MODEL_V1, (cpg_model_data_t *) &cpg_model_info, NULL) != CS_OK) {
+        return false;
+    }
+
+    if (cpg_fd_get(local_handle, &fd) != CS_OK) {
+        return false;
+    }
+
+    cpg_finalize(local_handle);
+
+    return true;
+}
+
+/* =::=::=::= Configuration =::=::=::= */
+static int
+get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, char **value,
+               const char *fallback)
+{
+    int rc = 0, retries = 0;
+
+    cs_repeat(retries, 5, rc = cmap_get_string(object_handle, key, value));
+    if (rc != CS_OK) {
+        crm_trace("Search for %s failed %d, defaulting to %s", key, rc, fallback);
+        pcmk__str_update(value, fallback);
+    }
+    crm_trace("%s: %s", key, *value);
+    return rc;
+}
+
+gboolean
+mcp_read_config(void)
+{
+    cs_error_t rc = CS_OK;
+    int retries = 0;
+    cmap_handle_t local_handle;
+    uint64_t config = 0;
+    int fd = -1;
+    uid_t found_uid = 0;
+    gid_t found_gid = 0;
+    pid_t found_pid = 0;
+    int rv;
+    enum cluster_type_e stack;
+
+    // There can be only one possibility
+    do {
+        rc = pcmk__init_cmap(&local_handle);
+        if (rc != CS_OK) {
+            retries++;
+            crm_info("Could not connect to Corosync CMAP: %s (retrying in %ds) "
+                     CRM_XS " rc=%d", cs_strerror(rc), retries, rc);
+            sleep(retries);
+
+        } else {
+            break;
+        }
+
+    } while (retries < 5);
+
+    if (rc != CS_OK) {
+        crm_crit("Could not connect to Corosync CMAP: %s "
+                 CRM_XS " rc=%d", cs_strerror(rc), rc);
+        return FALSE;
+    }
+
+    rc = cmap_fd_get(local_handle, &fd);
+    if (rc != CS_OK) {
+        crm_crit("Could not get Corosync CMAP descriptor: %s " CRM_XS " rc=%d",
+                 cs_strerror(rc), rc);
+        cmap_finalize(local_handle);
+        return FALSE;
+    }
+
+    /* CMAP provider run as root (in given user namespace, anyway)? */
+    if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
+                                            &found_uid, &found_gid))) {
+        crm_crit("Rejecting Corosync CMAP provider because process %lld "
+                 "is running as uid %lld gid %lld, not root",
+                 (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
+                 (long long) found_uid, (long long) found_gid);
+        cmap_finalize(local_handle);
+        return FALSE;
+    } else if (rv < 0) {
+        crm_crit("Could not authenticate Corosync CMAP provider: %s "
+                 CRM_XS " rc=%d", strerror(-rv), -rv);
+        cmap_finalize(local_handle);
+        return FALSE;
+    }
+
+    stack = get_cluster_type();
+    if (stack != pcmk_cluster_corosync) {
+        crm_crit("Expected Corosync cluster layer but detected %s "
+                 CRM_XS " stack=%d", name_for_cluster_type(stack), stack);
+        return FALSE;
+    }
+
+    crm_info("Reading configuration for %s stack",
+             name_for_cluster_type(stack));
+    pcmk__set_env_option(PCMK__ENV_CLUSTER_TYPE, "corosync");
+    pcmk__set_env_option(PCMK__ENV_QUORUM_TYPE, "corosync");
+
+    // If debug logging is not configured, check whether corosync has it
+    if (pcmk__env_option(PCMK__ENV_DEBUG) == NULL) {
+        char *debug_enabled = NULL;
+
+        get_config_opt(config, local_handle, "logging.debug", &debug_enabled, "off");
+
+        if (crm_is_true(debug_enabled)) {
+            pcmk__set_env_option(PCMK__ENV_DEBUG, "1");
+            if (get_crm_log_level() < LOG_DEBUG) {
+                set_crm_log_level(LOG_DEBUG);
+            }
+
+        } else {
+            pcmk__set_env_option(PCMK__ENV_DEBUG, "0");
+        }
+
+        free(debug_enabled);
+    }
+
+    if(local_handle){
+        gid_t gid = 0;
+        if (pcmk_daemon_user(NULL, &gid) < 0) {
+            crm_warn("Could not authorize group with Corosync " CRM_XS
+                     " No group found for user %s", CRM_DAEMON_USER);
+
+        } else {
+            char key[PATH_MAX];
+            snprintf(key, PATH_MAX, "uidgid.gid.%u", gid);
+            rc = cmap_set_uint8(local_handle, key, 1);
+            if (rc != CS_OK) {
+                crm_warn("Could not authorize group with Corosync: %s " CRM_XS
+                         " group=%u rc=%d", pcmk__cs_err_str(rc), gid, rc);
+            }
+        }
+    }
+    cmap_finalize(local_handle);
+
+    return TRUE;
+}
diff --git a/daemons/pacemakerd/pcmkd_messages.c b/daemons/pacemakerd/pcmkd_messages.c
new file mode 100644
index 0000000..7ed9899
--- /dev/null
+++ b/daemons/pacemakerd/pcmkd_messages.c
@@ -0,0 +1,278 @@
+/*
+ * Copyright 2010-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include "pacemakerd.h"
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <time.h>
+#include <sys/types.h>
+
+static GHashTable *pcmkd_handlers = NULL;
+
+static xmlNode *
+handle_node_cache_request(pcmk__request_t *request)
+{
+    crm_trace("Ignoring request from client %s to purge node "
+              "because peer cache is not used",
+              pcmk__client_name(request->ipc_client));
+
+    pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags,
+                       "ack", NULL, CRM_EX_OK);
+    return NULL;
+}
+
+static xmlNode *
+handle_ping_request(pcmk__request_t *request)
+{
+    xmlNode *msg = request->xml;
+
+    const char *value = NULL;
+    xmlNode *ping = NULL;
+    xmlNode *reply = NULL;
+    const char *from = crm_element_value(msg, F_CRM_SYS_FROM);
+
+    /* Pinged for status */
+    crm_trace("Pinged from " F_CRM_SYS_FROM "='%s' " F_CRM_ORIGIN "='%s'",
+              pcmk__s(from, ""),
+              pcmk__s(crm_element_value(msg, F_CRM_ORIGIN), ""));
+
+    pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags,
+                       "ack", NULL, CRM_EX_INDETERMINATE);
+
+    ping = create_xml_node(NULL, XML_CRM_TAG_PING);
+    value = crm_element_value(msg, F_CRM_SYS_TO);
+    crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value);
+    crm_xml_add(ping, XML_PING_ATTR_PACEMAKERDSTATE, pacemakerd_state);
+    crm_xml_add_ll(ping, XML_ATTR_TSTAMP,
+                   (long long) subdaemon_check_progress);
+    crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
+    reply = create_reply(msg, ping);
+
+    free_xml(ping);
+
+    if (reply == NULL) {
+        pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
+                            "Failed building ping reply for client %s",
+                            pcmk__client_name(request->ipc_client));
+    } else {
+        pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+    }
+
+    /* just proceed state on sbd pinging us */
+    if (from && strstr(from, "sbd")) {
+        if (pcmk__str_eq(pacemakerd_state, XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE, pcmk__str_none)) {
+            if (pcmk__get_sbd_sync_resource_startup()) {
+                crm_notice("Shutdown-complete-state passed to SBD.");
+            }
+
+            shutdown_complete_state_reported_to = request->ipc_client->pid;
+
+        } else if (pcmk__str_eq(pacemakerd_state, XML_PING_ATTR_PACEMAKERDSTATE_WAITPING, pcmk__str_none)) {
+            crm_notice("Received startup-trigger from SBD.");
+            pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
+            mainloop_set_trigger(startup_trigger);
+        }
+    }
+
+    return reply;
+}
+
+static xmlNode *
+handle_shutdown_request(pcmk__request_t *request)
+{
+    xmlNode *msg = request->xml;
+
+    xmlNode *shutdown = NULL;
+    xmlNode *reply = NULL;
+
+    /* Only allow privileged users (i.e. root or hacluster) to shut down
+     * Pacemaker from the command line (or direct IPC), so that other users
+     * are forced to go through the CIB and have ACLs applied.
+     */
+    bool allowed = pcmk_is_set(request->ipc_client->flags, pcmk__client_privileged);
+
+    pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags,
+                       "ack", NULL, CRM_EX_INDETERMINATE);
+
+    shutdown = create_xml_node(NULL, XML_CIB_ATTR_SHUTDOWN);
+
+    if (allowed) {
+        crm_notice("Shutting down in response to IPC request %s from %s",
+                   crm_element_value(msg, F_CRM_REFERENCE),
+                   crm_element_value(msg, F_CRM_ORIGIN));
+        crm_xml_add_int(shutdown, XML_LRM_ATTR_OPSTATUS, CRM_EX_OK);
+    } else {
+        crm_warn("Ignoring shutdown request from unprivileged client %s",
+                 pcmk__client_name(request->ipc_client));
+        crm_xml_add_int(shutdown, XML_LRM_ATTR_OPSTATUS, CRM_EX_INSUFFICIENT_PRIV);
+    }
+
+    reply = create_reply(msg, shutdown);
+    free_xml(shutdown);
+
+    if (reply == NULL) {
+        pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
+                            "Failed building shutdown reply for client %s",
+                            pcmk__client_name(request->ipc_client));
+    } else {
+        pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+    }
+
+    if (allowed) {
+        pcmk_shutdown(15);
+    }
+
+    return reply;
+}
+
+static xmlNode *
+handle_unknown_request(pcmk__request_t *request)
+{
+    pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags,
+                       "ack", NULL, CRM_EX_INVALID_PARAM);
+
+    pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
+                        "Unknown IPC request type '%s' (bug?)",
+                        pcmk__client_name(request->ipc_client));
+    return NULL;
+}
+
+static void
+pcmkd_register_handlers(void)
+{
+    pcmk__server_command_t handlers[] = {
+        { CRM_OP_RM_NODE_CACHE, handle_node_cache_request },
+        { CRM_OP_PING, handle_ping_request },
+        { CRM_OP_QUIT, handle_shutdown_request },
+        { NULL, handle_unknown_request },
+    };
+
+    pcmkd_handlers = pcmk__register_handlers(handlers);
+}
+
+static int32_t
+pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
+{
+    crm_trace("Connection %p", c);
+    if (pcmk__new_client(c, uid, gid) == NULL) {
+        return -EIO;
+    }
+    return 0;
+}
+
+/* Error code means? */
+static int32_t
+pcmk_ipc_closed(qb_ipcs_connection_t * c)
+{
+    pcmk__client_t *client = pcmk__find_client(c);
+
+    if (client == NULL) {
+        return 0;
+    }
+    crm_trace("Connection %p", c);
+    if (shutdown_complete_state_reported_to == client->pid) {
+        shutdown_complete_state_reported_client_closed = TRUE;
+        if (shutdown_trigger) {
+            mainloop_set_trigger(shutdown_trigger);
+        }
+    }
+    pcmk__free_client(client);
+    return 0;
+}
+
+static void
+pcmk_ipc_destroy(qb_ipcs_connection_t * c)
+{
+    crm_trace("Connection %p", c);
+    pcmk_ipc_closed(c);
+}
+
+/* Exit code means? */
+static int32_t
+pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
+{
+    uint32_t id = 0;
+    uint32_t flags = 0;
+    xmlNode *msg = NULL;
+    pcmk__client_t *c = pcmk__find_client(qbc);
+
+    CRM_CHECK(c != NULL, return 0);
+
+    if (pcmkd_handlers == NULL) {
+        pcmkd_register_handlers();
+    }
+
+    msg = pcmk__client_data2xml(c, data, &id, &flags);
+    if (msg == NULL) {
+        pcmk__ipc_send_ack(c, id, flags, "ack", NULL, CRM_EX_PROTOCOL);
+        return 0;
+
+    } else {
+        char *log_msg = NULL;
+        const char *reason = NULL;
+        xmlNode *reply = NULL;
+
+        pcmk__request_t request = {
+            .ipc_client     = c,
+            .ipc_id         = id,
+            .ipc_flags      = flags,
+            .peer           = NULL,
+            .xml            = msg,
+            .call_options   = 0,
+            .result         = PCMK__UNKNOWN_RESULT,
+        };
+
+        request.op = crm_element_value_copy(request.xml, F_CRM_TASK);
+        CRM_CHECK(request.op != NULL, return 0);
+
+        reply = pcmk__process_request(&request, pcmkd_handlers);
+
+        if (reply != NULL) {
+            pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event);
+            free_xml(reply);
+        }
+
+        reason = request.result.exit_reason;
+
+        log_msg = crm_strdup_printf("Processed %s request from %s %s: %s%s%s%s",
+                                    request.op, pcmk__request_origin_type(&request),
+                                    pcmk__request_origin(&request),
+                                    pcmk_exec_status_str(request.result.execution_status),
+                                    (reason == NULL)? "" : " (",
+                                    (reason == NULL)? "" : reason,
+                                    (reason == NULL)? "" : ")");
+
+        if (!pcmk__result_ok(&request.result)) {
+            crm_warn("%s", log_msg);
+        } else {
+            crm_debug("%s", log_msg);
+        }
+
+        free(log_msg);
+        pcmk__reset_request(&request);
+    }
+
+    free_xml(msg);
+    return 0;
+}
+
+struct qb_ipcs_service_handlers mcp_ipc_callbacks = {
+    .connection_accept = pcmk_ipc_accept,
+    .connection_created = NULL,
+    .msg_process = pcmk_ipc_dispatch,
+    .connection_closed = pcmk_ipc_closed,
+    .connection_destroyed = pcmk_ipc_destroy
+};
diff --git a/daemons/pacemakerd/pcmkd_subdaemons.c b/daemons/pacemakerd/pcmkd_subdaemons.c
new file mode 100644
index 0000000..3b08ecc
--- /dev/null
+++ b/daemons/pacemakerd/pcmkd_subdaemons.c
@@ -0,0 +1,888 @@
+/*
+ * Copyright 2010-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include "pacemakerd.h"
+
+#include <errno.h>
+#include <grp.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <crm/cluster.h>
+#include <crm/msg_xml.h>
+
+typedef struct pcmk_child_s {
+    pid_t pid;
+    int respawn_count;
+    bool respawn;
+    const char *name;
+    const char *uid;
+    const char *command;
+    const char *endpoint;  /* IPC server name */
+    bool needs_cluster;
+    int check_count;
+
+    /* Anything below here will be dynamically initialized */
+    bool needs_retry;
+    bool active_before_startup;
+} pcmk_child_t;
+
+#define PCMK_PROCESS_CHECK_INTERVAL 1
+#define PCMK_PROCESS_CHECK_RETRIES  5
+#define SHUTDOWN_ESCALATION_PERIOD  180000  /* 3m */
+
+/* Index into the array below */
+#define PCMK_CHILD_CONTROLD  5
+
+static pcmk_child_t pcmk_children[] = {
+    {
+        0, 0, true,  "pacemaker-based", CRM_DAEMON_USER,
+        CRM_DAEMON_DIR "/pacemaker-based", PCMK__SERVER_BASED_RO,
+        true
+    },
+    {
+        0, 0, true, "pacemaker-fenced", NULL,
+        CRM_DAEMON_DIR "/pacemaker-fenced", "stonith-ng",
+        true
+    },
+    {
+        0, 0, true,  "pacemaker-execd", NULL,
+        CRM_DAEMON_DIR "/pacemaker-execd", CRM_SYSTEM_LRMD,
+        false
+    },
+    {
+        0, 0, true, "pacemaker-attrd", CRM_DAEMON_USER,
+        CRM_DAEMON_DIR "/pacemaker-attrd", T_ATTRD,
+        true
+    },
+    {
+        0, 0, true, "pacemaker-schedulerd", CRM_DAEMON_USER,
+        CRM_DAEMON_DIR "/pacemaker-schedulerd", CRM_SYSTEM_PENGINE,
+        false
+    },
+    {
+        0, 0, true, "pacemaker-controld", CRM_DAEMON_USER,
+        CRM_DAEMON_DIR "/pacemaker-controld", CRM_SYSTEM_CRMD,
+        true
+    },
+};
+
+static char *opts_default[] = { NULL, NULL };
+static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
+
+crm_trigger_t *shutdown_trigger = NULL;
+crm_trigger_t *startup_trigger = NULL;
+time_t subdaemon_check_progress = 0;
+
+// Whether we need root group access to talk to cluster layer
+static bool need_root_group = true;
+
+/* When contacted via pacemakerd-api by a client having sbd in
+ * the name we assume it is sbd-daemon which wants to know
+ * if pacemakerd shutdown gracefully.
+ * Thus when everything is shutdown properly pacemakerd
+ * waits till it has reported the graceful completion of
+ * shutdown to sbd and just when sbd-client closes the
+ * connection we can assume that the report has arrived
+ * properly so that pacemakerd can finally exit.
+ * Following two variables are used to track that handshake.
+ */
+unsigned int shutdown_complete_state_reported_to = 0;
+gboolean shutdown_complete_state_reported_client_closed = FALSE;
+
+/* state we report when asked via pacemakerd-api status-ping */
+const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT;
+gboolean running_with_sbd = FALSE; /* local copy */
+
+GMainLoop *mainloop = NULL;
+
+static gboolean fatal_error = FALSE;
+
+static int child_liveness(pcmk_child_t *child);
+static gboolean escalate_shutdown(gpointer data);
+static int start_child(pcmk_child_t * child);
+static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode);
+static void pcmk_process_exit(pcmk_child_t * child);
+static gboolean pcmk_shutdown_worker(gpointer user_data);
+static gboolean stop_child(pcmk_child_t * child, int signal);
+
+static bool
+pcmkd_cluster_connected(void)
+{
+#if SUPPORT_COROSYNC
+    return pcmkd_corosync_connected();
+#else
+    return true;
+#endif
+}
+
+static gboolean
+check_next_subdaemon(gpointer user_data)
+{
+    static int next_child = 0;
+    int rc = child_liveness(&pcmk_children[next_child]);
+
+    crm_trace("Checked %s[%lld]: %s (%d)",
+              pcmk_children[next_child].name,
+              (long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[next_child].pid),
+              pcmk_rc_str(rc), rc);
+
+    switch (rc) {
+        case pcmk_rc_ok:
+            pcmk_children[next_child].check_count = 0;
+            subdaemon_check_progress = time(NULL);
+            break;
+        case pcmk_rc_ipc_pid_only: // This case: it was previously OK
+            pcmk_children[next_child].check_count++;
+            if (pcmk_children[next_child].check_count >= PCMK_PROCESS_CHECK_RETRIES) {
+                crm_err("%s[%lld] is unresponsive to ipc after %d tries but "
+                        "we found the pid so have it killed that we can restart",
+                        pcmk_children[next_child].name,
+                        (long long) PCMK__SPECIAL_PID_AS_0(
+                            pcmk_children[next_child].pid),
+                        pcmk_children[next_child].check_count);
+                stop_child(&pcmk_children[next_child], SIGKILL);
+                if (pcmk_children[next_child].respawn) {
+                    /* as long as the respawn-limit isn't reached
+                       give it another round of check retries
+                     */
+                    pcmk_children[next_child].check_count = 0;
+                }
+            } else {
+                crm_notice("%s[%lld] is unresponsive to ipc after %d tries",
+                        pcmk_children[next_child].name,
+                        (long long) PCMK__SPECIAL_PID_AS_0(
+                            pcmk_children[next_child].pid),
+                        pcmk_children[next_child].check_count);
+                if (pcmk_children[next_child].respawn) {
+                    /* as long as the respawn-limit isn't reached
+                       and we haven't run out of connect retries
+                       we account this as progress we are willing
+                       to tell to sbd
+                     */
+                    subdaemon_check_progress = time(NULL);
+                }
+            }
+            /* go to the next child and see if
+               we can make progress there
+             */
+            break;
+        case pcmk_rc_ipc_unresponsive:
+            if (!pcmk_children[next_child].respawn) {
+                /* if a subdaemon is down and we don't want it
+                   to be restarted this is a success during
+                   shutdown. if it isn't restarted anymore
+                   due to MAX_RESPAWN it is
+                   rather no success.
+                 */
+                if (pcmk_children[next_child].respawn_count <= MAX_RESPAWN) {
+                    subdaemon_check_progress = time(NULL);
+                }
+            }
+            if (!pcmk_children[next_child].active_before_startup) {
+                crm_trace("found %s[%lld] missing - signal-handler "
+                          "will take care of it",
+                           pcmk_children[next_child].name,
+                           (long long) PCMK__SPECIAL_PID_AS_0(
+                            pcmk_children[next_child].pid));
+                break;
+            }
+            if (pcmk_children[next_child].respawn) {
+                crm_err("%s[%lld] terminated",
+                        pcmk_children[next_child].name,
+                        (long long) PCMK__SPECIAL_PID_AS_0(
+                            pcmk_children[next_child].pid));
+            } else {
+                /* orderly shutdown */
+                crm_notice("%s[%lld] terminated",
+                           pcmk_children[next_child].name,
+                           (long long) PCMK__SPECIAL_PID_AS_0(
+                                pcmk_children[next_child].pid));
+            }
+            pcmk_process_exit(&(pcmk_children[next_child]));
+            break;
+        default:
+            crm_exit(CRM_EX_FATAL);
+            break;  /* static analysis/noreturn */
+    }
+
+    next_child++;
+    if (next_child >= PCMK__NELEM(pcmk_children)) {
+        next_child = 0;
+    }
+
+    return G_SOURCE_CONTINUE;
+}
+
+static gboolean
+escalate_shutdown(gpointer data)
+{
+    pcmk_child_t *child = data;
+
+    if (child->pid == PCMK__SPECIAL_PID) {
+        pcmk_process_exit(child);
+
+    } else if (child->pid != 0) {
+        /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */
+        crm_err("Child %s not terminating in a timely manner, forcing", child->name);
+        stop_child(child, SIGSEGV);
+    }
+    return FALSE;
+}
+
+static void
+pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
+{
+    pcmk_child_t *child = mainloop_child_userdata(p);
+    const char *name = mainloop_child_name(p);
+
+    if (signo) {
+        do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR),
+                   "%s[%d] terminated with signal %d (%s)%s",
+                   name, pid, signo, strsignal(signo),
+                   (core? " and dumped core" : ""));
+
+    } else {
+        switch(exitcode) {
+            case CRM_EX_OK:
+                crm_info("%s[%d] exited with status %d (%s)",
+                         name, pid, exitcode, crm_exit_str(exitcode));
+                break;
+
+            case CRM_EX_FATAL:
+                crm_warn("Shutting cluster down because %s[%d] had fatal failure",
+                         name, pid);
+                child->respawn = false;
+                fatal_error = TRUE;
+                pcmk_shutdown(SIGTERM);
+                break;
+
+            case CRM_EX_PANIC:
+                crm_emerg("%s[%d] instructed the machine to reset", name, pid);
+                child->respawn = false;
+                fatal_error = TRUE;
+                pcmk__panic(__func__);
+                pcmk_shutdown(SIGTERM);
+                break;
+
+            default:
+                crm_err("%s[%d] exited with status %d (%s)",
+                        name, pid, exitcode, crm_exit_str(exitcode));
+                break;
+        }
+    }
+
+    pcmk_process_exit(child);
+}
+
+static void
+pcmk_process_exit(pcmk_child_t * child)
+{
+    child->pid = 0;
+    child->active_before_startup = false;
+    child->check_count = 0;
+
+    child->respawn_count += 1;
+    if (child->respawn_count > MAX_RESPAWN) {
+        crm_err("Child respawn count exceeded by %s", child->name);
+        child->respawn = false;
+    }
+
+    if (shutdown_trigger) {
+        /* resume step-wise shutdown (returned TRUE yields no parallelizing) */
+        mainloop_set_trigger(shutdown_trigger);
+
+    } else if (!child->respawn) {
+        /* nothing to do */
+
+    } else if (crm_is_true(getenv("PCMK_fail_fast"))) {
+        crm_err("Rebooting system because of %s", child->name);
+        pcmk__panic(__func__);
+
+    } else if (child_liveness(child) == pcmk_rc_ok) {
+        crm_warn("One-off suppressing strict respawning of a child process %s,"
+                 " appears alright per %s IPC end-point",
+                 child->name, child->endpoint);
+
+    } else if (child->needs_cluster && !pcmkd_cluster_connected()) {
+        crm_notice("Not respawning %s subdaemon until cluster returns",
+                   child->name);
+        child->needs_retry = true;
+
+    } else {
+        crm_notice("Respawning %s subdaemon after unexpected exit",
+                   child->name);
+        start_child(child);
+    }
+}
+
+static gboolean
+pcmk_shutdown_worker(gpointer user_data)
+{
+    static int phase = PCMK__NELEM(pcmk_children) - 1;
+    static time_t next_log = 0;
+
+    if (phase == PCMK__NELEM(pcmk_children) - 1) {
+        crm_notice("Shutting down Pacemaker");
+        pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN;
+    }
+
+    for (; phase >= 0; phase--) {
+        pcmk_child_t *child = &(pcmk_children[phase]);
+
+        if (child->pid != 0) {
+            time_t now = time(NULL);
+
+            if (child->respawn) {
+                if (child->pid == PCMK__SPECIAL_PID) {
+                    crm_warn("The process behind %s IPC cannot be"
+                             " terminated, so either wait the graceful"
+                             " period of %ld s for its native termination"
+                             " if it vitally depends on some other daemons"
+                             " going down in a controlled way already,"
+                             " or locate and kill the correct %s process"
+                             " on your own; set PCMK_fail_fast=1 to avoid"
+                             " this altogether next time around",
+                             child->name, (long) SHUTDOWN_ESCALATION_PERIOD,
+                             child->command);
+                }
+                next_log = now + 30;
+                child->respawn = false;
+                stop_child(child, SIGTERM);
+                if (phase < PCMK_CHILD_CONTROLD) {
+                    g_timeout_add(SHUTDOWN_ESCALATION_PERIOD,
+                                  escalate_shutdown, child);
+                }
+
+            } else if (now >= next_log) {
+                next_log = now + 30;
+                crm_notice("Still waiting for %s to terminate "
+                           CRM_XS " pid=%lld",
+                           child->name, (long long) child->pid);
+            }
+            return TRUE;
+        }
+
+        /* cleanup */
+        crm_debug("%s confirmed stopped", child->name);
+        child->pid = 0;
+    }
+
+    crm_notice("Shutdown complete");
+    pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE;
+    if (!fatal_error && running_with_sbd &&
+        pcmk__get_sbd_sync_resource_startup() &&
+        !shutdown_complete_state_reported_client_closed) {
+        crm_notice("Waiting for SBD to pick up shutdown-complete-state.");
+        return TRUE;
+    }
+
+    {
+        const char *delay = pcmk__env_option(PCMK__ENV_SHUTDOWN_DELAY);
+        if(delay) {
+            sync();
+            pcmk__sleep_ms(crm_get_msec(delay));
+        }
+    }
+
+    g_main_loop_quit(mainloop);
+
+    if (fatal_error) {
+        crm_notice("Shutting down and staying down after fatal error");
+#ifdef SUPPORT_COROSYNC
+        pcmkd_shutdown_corosync();
+#endif
+        crm_exit(CRM_EX_FATAL);
+    }
+
+    return TRUE;
+}
+
+/* TODO once libqb is taught to juggle with IPC end-points carried over as
+        bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325)
+        it shall hand over these descriptors here if/once they are successfully
+        pre-opened in (presumably) child_liveness(), to avoid any remaining
+        room for races */
+ // \return Standard Pacemaker return code
+static int
+start_child(pcmk_child_t * child)
+{
+    uid_t uid = 0;
+    gid_t gid = 0;
+    gboolean use_valgrind = FALSE;
+    gboolean use_callgrind = FALSE;
+    const char *env_valgrind = getenv("PCMK_valgrind_enabled");
+    const char *env_callgrind = getenv("PCMK_callgrind_enabled");
+
+    child->active_before_startup = false;
+    child->check_count = 0;
+
+    if (child->command == NULL) {
+        crm_info("Nothing to do for child \"%s\"", child->name);
+        return pcmk_rc_ok;
+    }
+
+    if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
+        use_callgrind = TRUE;
+        use_valgrind = TRUE;
+
+    } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) {
+        use_callgrind = TRUE;
+        use_valgrind = TRUE;
+
+    } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
+        use_valgrind = TRUE;
+
+    } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) {
+        use_valgrind = TRUE;
+    }
+
+    if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
+        crm_warn("Cannot enable valgrind for %s:"
+                 " The location of the valgrind binary is unknown", child->name);
+        use_valgrind = FALSE;
+    }
+
+    if (child->uid) {
+        if (crm_user_lookup(child->uid, &uid, &gid) < 0) {
+            crm_err("Invalid user (%s) for %s: not found", child->uid, child->name);
+            return EACCES;
+        }
+        crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name);
+    }
+
+    child->pid = fork();
+    CRM_ASSERT(child->pid != -1);
+
+    if (child->pid > 0) {
+        /* parent */
+        mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit);
+
+        crm_info("Forked child %lld for process %s%s",
+                 (long long) child->pid, child->name,
+                 use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
+        return pcmk_rc_ok;
+
+    } else {
+        /* Start a new session */
+        (void)setsid();
+
+        /* Setup the two alternate arg arrays */
+        opts_vgrind[0] = strdup(VALGRIND_BIN);
+        if (use_callgrind) {
+            opts_vgrind[1] = strdup("--tool=callgrind");
+            opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p");
+            opts_vgrind[3] = strdup(child->command);
+            opts_vgrind[4] = NULL;
+        } else {
+            opts_vgrind[1] = strdup(child->command);
+            opts_vgrind[2] = NULL;
+            opts_vgrind[3] = NULL;
+            opts_vgrind[4] = NULL;
+        }
+        opts_default[0] = strdup(child->command);
+
+        if(gid) {
+            // Drop root group access if not needed
+            if (!need_root_group && (setgid(gid) < 0)) {
+                crm_warn("Could not set group to %d: %s", gid, strerror(errno));
+            }
+
+            /* Initialize supplementary groups to only those always granted to
+             * the user, plus haclient (so we can access IPC).
+             */
+            if (initgroups(child->uid, gid) < 0) {
+                crm_err("Cannot initialize groups for %s: %s (%d)",
+                        child->uid, pcmk_rc_str(errno), errno);
+            }
+        }
+
+        if (uid && setuid(uid) < 0) {
+            crm_warn("Could not set user to %s (id %d): %s",
+                     child->uid, uid, strerror(errno));
+        }
+
+        pcmk__close_fds_in_child(true);
+
+        pcmk__open_devnull(O_RDONLY);   // stdin (fd 0)
+        pcmk__open_devnull(O_WRONLY);   // stdout (fd 1)
+        pcmk__open_devnull(O_WRONLY);   // stderr (fd 2)
+
+        if (use_valgrind) {
+            (void)execvp(VALGRIND_BIN, opts_vgrind);
+        } else {
+            (void)execvp(child->command, opts_default);
+        }
+        crm_crit("Could not execute %s: %s", child->command, strerror(errno));
+        crm_exit(CRM_EX_FATAL);
+    }
+    return pcmk_rc_ok;          /* never reached */
+}
+
+/*!
+ * \internal
+ * \brief Check the liveness of the child based on IPC name and PID if tracked
+ *
+ * \param[in,out] child  Child tracked data
+ *
+ * \return Standard Pacemaker return code
+ *
+ * \note Return codes of particular interest include pcmk_rc_ipc_unresponsive
+ *       indicating that no trace of IPC liveness was detected,
+ *       pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by
+ *       an unauthorized process, and pcmk_rc_ipc_pid_only indicating that
+ *       the child is up by PID but not IPC end-point (possibly starting).
+ * \note This function doesn't modify any of \p child members but \c pid,
+ *       and is not actively toying with processes as such but invoking
+ *       \c stop_child in one particular case (there's for some reason
+ *       a different authentic holder of the IPC end-point).
+ */
+static int
+child_liveness(pcmk_child_t *child)
+{
+    uid_t cl_uid = 0;
+    gid_t cl_gid = 0;
+    const uid_t root_uid = 0;
+    const gid_t root_gid = 0;
+    const uid_t *ref_uid;
+    const gid_t *ref_gid;
+    int rc = pcmk_rc_ipc_unresponsive;
+    pid_t ipc_pid = 0;
+
+    if (child->endpoint == NULL
+            && (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) {
+        crm_err("Cannot track child %s for missing both API end-point and PID",
+                child->name);
+        rc = EINVAL; // Misuse of function when child is not trackable
+
+    } else if (child->endpoint != NULL) {
+        int legacy_rc = pcmk_ok;
+
+        if (child->uid == NULL) {
+            ref_uid = &root_uid;
+            ref_gid = &root_gid;
+        } else {
+            ref_uid = &cl_uid;
+            ref_gid = &cl_gid;
+            legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid);
+        }
+
+        if (legacy_rc < 0) {
+            rc = pcmk_legacy2rc(legacy_rc);
+            crm_err("Could not find user and group IDs for user %s: %s "
+                    CRM_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc);
+        } else {
+            rc = pcmk__ipc_is_authentic_process_active(child->endpoint,
+                                                       *ref_uid, *ref_gid,
+                                                       &ipc_pid);
+            if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) {
+                if (child->pid <= 0) {
+                    /* If rc is pcmk_rc_ok, ipc_pid is nonzero and this
+                     * initializes a new child. If rc is
+                     * pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will
+                     * investigate further.
+                     */
+                    child->pid = ipc_pid;
+                } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) {
+                    /* An unexpected (but authorized) process is responding to
+                     * IPC. Investigate further.
+                     */
+                    rc = pcmk_rc_ipc_unresponsive;
+                }
+            }
+        }
+    }
+
+    if (rc == pcmk_rc_ipc_unresponsive) {
+        /* If we get here, a child without IPC is being tracked, no IPC liveness
+         * has been detected, or IPC liveness has been detected with an
+         * unexpected (but authorized) process. This is safe on FreeBSD since
+         * the only change possible from a proper child's PID into "special" PID
+         * of 1 behind more loosely related process.
+         */
+        int ret = pcmk__pid_active(child->pid, child->name);
+
+        if (ipc_pid && ((ret != pcmk_rc_ok)
+                        || ipc_pid == PCMK__SPECIAL_PID
+                        || (pcmk__pid_active(ipc_pid,
+                                             child->name) == pcmk_rc_ok))) {
+            /* An unexpected (but authorized) process was detected at the IPC
+             * endpoint, and either it is active, or the child we're tracking is
+             * not.
+             */
+
+            if (ret == pcmk_rc_ok) {
+                /* The child we're tracking is active. Kill it, and adopt the
+                 * detected process. This assumes that our children don't fork
+                 * (thus getting a different PID owning the IPC), but rather the
+                 * tracking got out of sync because of some means external to
+                 * Pacemaker, and adopting the detected process is better than
+                 * killing it and possibly having to spawn a new child.
+                 */
+                /* not possessing IPC, afterall (what about corosync CPG?) */
+                stop_child(child, SIGKILL);
+            }
+            rc = pcmk_rc_ok;
+            child->pid = ipc_pid;
+        } else if (ret == pcmk_rc_ok) {
+            // Our tracked child's PID was found active, but not its IPC
+            rc = pcmk_rc_ipc_pid_only;
+        } else if ((child->pid == 0) && (ret == EINVAL)) {
+            // FreeBSD can return EINVAL
+            rc = pcmk_rc_ipc_unresponsive;
+        } else {
+            switch (ret) {
+                case EACCES:
+                    rc = pcmk_rc_ipc_unauthorized;
+                    break;
+                case ESRCH:
+                    rc = pcmk_rc_ipc_unresponsive;
+                    break;
+                default:
+                    rc = ret;
+                    break;
+            }
+        }
+    }
+    return rc;
+}
+
+/*!
+ * \internal
+ * \brief Initial one-off check of the pre-existing "child" processes
+ *
+ * With "child" process, we mean the subdaemon that defines an API end-point
+ * (all of them do as of the comment) -- the possible complement is skipped
+ * as it is deemed it has no such shared resources to cause conflicts about,
+ * hence it can presumably be started anew without hesitation.
+ * If that won't hold true in the future, the concept of a shared resource
+ * will have to be generalized beyond the API end-point.
+ *
+ * For boundary cases that the "child" is still starting (IPC end-point is yet
+ * to be witnessed), or more rarely (practically FreeBSD only), when there's
+ * a pre-existing "untrackable" authentic process, we give the situation some
+ * time to possibly unfold in the right direction, meaning that said socket
+ * will appear or the unattainable process will disappear per the observable
+ * IPC, respectively.
+ *
+ * \return Standard Pacemaker return code
+ *
+ * \note Since this gets run at the very start, \c respawn_count fields
+ *       for particular children get temporarily overloaded with "rounds
+ *       of waiting" tracking, restored once we are about to finish with
+ *       success (i.e. returning value >=0) and will remain unrestored
+ *       otherwise.  One way to suppress liveness detection logic for
+ *       particular child is to set the said value to a negative number.
+ */
+#define WAIT_TRIES 4  /* together with interleaved sleeps, worst case ~ 1s */
+int
+find_and_track_existing_processes(void)
+{
+    bool wait_in_progress;
+    int rc;
+    size_t i, rounds;
+
+    for (rounds = 1; rounds <= WAIT_TRIES; rounds++) {
+        wait_in_progress = false;
+        for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
+
+            if ((pcmk_children[i].endpoint == NULL)
+                || (pcmk_children[i].respawn_count < 0)) {
+                continue;
+            }
+
+            rc = child_liveness(&pcmk_children[i]);
+            if (rc == pcmk_rc_ipc_unresponsive) {
+                /* As a speculation, don't give up if there are more rounds to
+                 * come for other reasons, but don't artificially wait just
+                 * because of this, since we would preferably start ASAP.
+                 */
+                continue;
+            }
+
+            pcmk_children[i].respawn_count = rounds;
+            switch (rc) {
+                case pcmk_rc_ok:
+                    if (pcmk_children[i].pid == PCMK__SPECIAL_PID) {
+                        if (crm_is_true(getenv("PCMK_fail_fast"))) {
+                            crm_crit("Cannot reliably track pre-existing"
+                                     " authentic process behind %s IPC on this"
+                                     " platform and PCMK_fail_fast requested",
+                                     pcmk_children[i].endpoint);
+                            return EOPNOTSUPP;
+                        } else if (pcmk_children[i].respawn_count == WAIT_TRIES) {
+                            crm_notice("Assuming pre-existing authentic, though"
+                                       " on this platform untrackable, process"
+                                       " behind %s IPC is stable (was in %d"
+                                       " previous samples) so rather than"
+                                       " bailing out (PCMK_fail_fast not"
+                                       " requested), we just switch to a less"
+                                       " optimal IPC liveness monitoring"
+                                       " (not very suitable for heavy load)",
+                                       pcmk_children[i].name, WAIT_TRIES - 1);
+                            crm_warn("The process behind %s IPC cannot be"
+                                     " terminated, so the overall shutdown"
+                                     " will get delayed implicitly (%ld s),"
+                                     " which serves as a graceful period for"
+                                     " its native termination if it vitally"
+                                     " depends on some other daemons going"
+                                     " down in a controlled way already",
+                                     pcmk_children[i].name,
+                                     (long) SHUTDOWN_ESCALATION_PERIOD);
+                        } else {
+                            wait_in_progress = true;
+                            crm_warn("Cannot reliably track pre-existing"
+                                     " authentic process behind %s IPC on this"
+                                     " platform, can still disappear in %d"
+                                     " attempt(s)", pcmk_children[i].endpoint,
+                                     WAIT_TRIES - pcmk_children[i].respawn_count);
+                            continue;
+                        }
+                    }
+                    crm_notice("Tracking existing %s process (pid=%lld)",
+                               pcmk_children[i].name,
+                               (long long) PCMK__SPECIAL_PID_AS_0(
+                                               pcmk_children[i].pid));
+                    pcmk_children[i].respawn_count = -1;  /* 0~keep watching */
+                    pcmk_children[i].active_before_startup = true;
+                    break;
+                case pcmk_rc_ipc_pid_only:
+                    if (pcmk_children[i].respawn_count == WAIT_TRIES) {
+                        crm_crit("%s IPC end-point for existing authentic"
+                                 " process %lld did not (re)appear",
+                                 pcmk_children[i].endpoint,
+                                 (long long) PCMK__SPECIAL_PID_AS_0(
+                                                 pcmk_children[i].pid));
+                        return rc;
+                    }
+                    wait_in_progress = true;
+                    crm_warn("Cannot find %s IPC end-point for existing"
+                             " authentic process %lld, can still (re)appear"
+                             " in %d attempts (?)",
+                             pcmk_children[i].endpoint,
+                             (long long) PCMK__SPECIAL_PID_AS_0(
+                                             pcmk_children[i].pid),
+                             WAIT_TRIES - pcmk_children[i].respawn_count);
+                    continue;
+                default:
+                    crm_crit("Checked liveness of %s: %s " CRM_XS " rc=%d",
+                             pcmk_children[i].name, pcmk_rc_str(rc), rc);
+                    return rc;
+            }
+        }
+        if (!wait_in_progress) {
+            break;
+        }
+        pcmk__sleep_ms(250); // Wait a bit for changes to possibly happen
+    }
+    for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
+        pcmk_children[i].respawn_count = 0;  /* restore pristine state */
+    }
+
+    g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon,
+                          NULL);
+    return pcmk_rc_ok;
+}
+
+gboolean
+init_children_processes(void *user_data)
+{
+    if (is_corosync_cluster()) {
+        /* Corosync clusters can drop root group access, because we set
+         * uidgid.gid.${gid}=1 via CMAP, which allows these processes to connect
+         * to corosync.
+         */
+        need_root_group = false;
+    }
+
+    /* start any children that have not been detected */
+    for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
+        if (pcmk_children[i].pid != 0) {
+            /* we are already tracking it */
+            continue;
+        }
+
+        start_child(&(pcmk_children[i]));
+    }
+
+    /* From this point on, any daemons being started will be due to
+     * respawning rather than node start.
+     *
+     * This may be useful for the daemons to know
+     */
+    setenv("PCMK_respawned", "true", 1);
+    pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING;
+    return TRUE;
+}
+
+void
+pcmk_shutdown(int nsig)
+{
+    if (shutdown_trigger == NULL) {
+        shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
+    }
+    mainloop_set_trigger(shutdown_trigger);
+}
+
+void
+restart_cluster_subdaemons(void)
+{
+    for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
+        if (!pcmk_children[i].needs_retry || pcmk_children[i].pid != 0) {
+            continue;
+        }
+
+        crm_notice("Respawning cluster-based subdaemon: %s", pcmk_children[i].name);
+        if (start_child(&pcmk_children[i])) {
+            pcmk_children[i].needs_retry = false;
+        }
+    }
+}
+
+static gboolean
+stop_child(pcmk_child_t * child, int signal)
+{
+    if (signal == 0) {
+        signal = SIGTERM;
+    }
+
+    /* why to skip PID of 1?
+       - FreeBSD ~ how untrackable process behind IPC is masqueraded as
+       - elsewhere: how "init" task is designated; in particular, in systemd
+         arrangement of socket-based activation, this is pretty real */
+    if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) {
+        crm_debug("Nothing to do for child \"%s\" (process %lld)",
+                  child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
+        return TRUE;
+    }
+
+    if (child->pid <= 0) {
+        crm_trace("Client %s not running", child->name);
+        return TRUE;
+    }
+
+    errno = 0;
+    if (kill(child->pid, signal) == 0) {
+        crm_notice("Stopping %s "CRM_XS" sent signal %d to process %lld",
+                   child->name, signal, (long long) child->pid);
+
+    } else {
+        crm_err("Could not stop %s (process %lld) with signal %d: %s",
+                child->name, (long long) child->pid, signal, strerror(errno));
+    }
+
+    return TRUE;
+}
+