diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 06:53:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 06:53:20 +0000 |
commit | e5a812082ae033afb1eed82c0f2df3d0f6bdc93f (patch) | |
tree | a6716c9275b4b413f6c9194798b34b91affb3cc7 /daemons/pacemakerd | |
parent | Initial commit. (diff) | |
download | pacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.tar.xz pacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.zip |
Adding upstream version 2.1.6.upstream/2.1.6
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'daemons/pacemakerd')
-rw-r--r-- | daemons/pacemakerd/Makefile.am | 37 | ||||
-rw-r--r-- | daemons/pacemakerd/pacemaker.combined.upstart.in | 67 | ||||
-rw-r--r-- | daemons/pacemakerd/pacemaker.service.in | 103 | ||||
-rw-r--r-- | daemons/pacemakerd/pacemaker.upstart.in | 33 | ||||
-rw-r--r-- | daemons/pacemakerd/pacemakerd.8.inc | 5 | ||||
-rw-r--r-- | daemons/pacemakerd/pacemakerd.c | 483 | ||||
-rw-r--r-- | daemons/pacemakerd/pacemakerd.h | 35 | ||||
-rw-r--r-- | daemons/pacemakerd/pcmkd_corosync.c | 371 | ||||
-rw-r--r-- | daemons/pacemakerd/pcmkd_messages.c | 278 | ||||
-rw-r--r-- | daemons/pacemakerd/pcmkd_subdaemons.c | 888 |
10 files changed, 2300 insertions, 0 deletions
diff --git a/daemons/pacemakerd/Makefile.am b/daemons/pacemakerd/Makefile.am new file mode 100644 index 0000000..fc0e014 --- /dev/null +++ b/daemons/pacemakerd/Makefile.am @@ -0,0 +1,37 @@ +# +# Copyright 2004-2021 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. +# + +include $(top_srcdir)/mk/common.mk +include $(top_srcdir)/mk/man.mk + +sbin_PROGRAMS = pacemakerd + +if BUILD_SYSTEMD +systemdsystemunit_DATA = pacemaker.service +endif + +EXTRA_DIST = pacemakerd.8.inc + +## SOURCES + +noinst_HEADERS = pacemakerd.h + +pacemakerd_CFLAGS = $(CFLAGS_HARDENED_EXE) +pacemakerd_LDFLAGS = $(LDFLAGS_HARDENED_EXE) + +pacemakerd_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la $(top_builddir)/lib/common/libcrmcommon.la +pacemakerd_LDADD += $(CLUSTERLIBS) +pacemakerd_SOURCES = pacemakerd.c +if BUILD_CS_SUPPORT +pacemakerd_SOURCES += pcmkd_corosync.c +endif +pacemakerd_SOURCES += pcmkd_messages.c +pacemakerd_SOURCES += pcmkd_subdaemons.c + +CLEANFILES = $(man8_MANS) diff --git a/daemons/pacemakerd/pacemaker.combined.upstart.in b/daemons/pacemakerd/pacemaker.combined.upstart.in new file mode 100644 index 0000000..af59ff0 --- /dev/null +++ b/daemons/pacemakerd/pacemaker.combined.upstart.in @@ -0,0 +1,67 @@ +# pacemaker-corosync - High-Availability cluster +# +# Starts Corosync cluster engine and Pacemaker cluster manager. + +# if you use automatic start, uncomment the line below. +#start on started local and runlevel [2345] + +stop on runlevel [0123456] +kill timeout 3600 +respawn + +env prog=pacemakerd +env sysconf=@CONFIGDIR@/pacemaker +env rpm_lockdir=@localstatedir@/lock/subsys +env deb_lockdir=@localstatedir@/lock + +script + [ -f "$sysconf" ] && . "$sysconf" + exec $prog +end script + +pre-start script + pidof corosync || start corosync + + # if you use corosync-notifyd, uncomment the line below. + #start corosync-notifyd + + # give it time to fail. + sleep 2 + pidof corosync || { exit 1; } + + # if you use crm_mon, uncomment the line below. + #start crm_mon +end script + +post-start script + [ -f "$sysconf" ] && . "$sysconf" + [ -z "$LOCK_FILE" -a -d "$rpm_lockdir" ] && LOCK_FILE="$rpm_lockdir/pacemaker" + [ -z "$LOCK_FILE" -a -d "$deb_lockdir" ] && LOCK_FILE="$deb_lockdir/pacemaker" + touch "$LOCK_FILE" + pidof $prog > "@localstatedir@/run/$prog.pid" +end script + +post-stop script + [ -f "$sysconf" ] && . "$sysconf" + [ -z "$LOCK_FILE" -a -d "$rpm_lockdir" ] && LOCK_FILE="$rpm_lockdir/pacemaker" + [ -z "$LOCK_FILE" -a -d "$deb_lockdir" ] && LOCK_FILE="$deb_lockdir/pacemaker" + rm -f "$LOCK_FILE" + rm -f "@localstatedir@/run/$prog.pid" + + # if you use corosync-notifyd, uncomment the line below. + #stop corosync-notifyd || true + + # if you use watchdog of corosync, uncomment the line below. + #pidof corosync || false + + pidof pacemaker-controld || stop corosync + + # if you want to reboot a machine by watchdog of corosync when + # pacemakerd disappeared unexpectedly, uncomment the line below + # and invalidate above "respawn" stanza. + #pidof pacemaker-controld && killall -q -9 corosync + + # if you use crm_mon, uncomment the line below. + #stop crm_mon + +end script diff --git a/daemons/pacemakerd/pacemaker.service.in b/daemons/pacemakerd/pacemaker.service.in new file mode 100644 index 0000000..3fd53d9 --- /dev/null +++ b/daemons/pacemakerd/pacemaker.service.in @@ -0,0 +1,103 @@ +[Unit] +Description=Pacemaker High Availability Cluster Manager +Documentation=man:pacemakerd +Documentation=https://clusterlabs.org/pacemaker/doc/ + +# DefaultDependencies takes care of sysinit.target, +# basic.target, and shutdown.target + +# We need networking to bind to a network address. It is recommended not to +# use Wants or Requires with network.target, and not to use +# network-online.target for server daemons. +After=network.target + +# Time syncs can make the clock jump backward, which messes with logging +# and failure timestamps, so wait until it's done. +After=time-sync.target + +# Managing systemd resources requires DBus. +After=dbus.service +Wants=dbus.service + +# Some OCF resources may have dependencies that aren't managed by the cluster; +# these must be started before Pacemaker and stopped after it. The +# resource-agents package provides this target, which lets system adminstrators +# add drop-ins for those dependencies. +After=resource-agents-deps.target +Wants=resource-agents-deps.target + +After=syslog.service +After=rsyslog.service +After=corosync.service +Requires=corosync.service + +# If Pacemaker respawns repeatedly, give up after this many tries in this time +StartLimitBurst=5 +StartLimitIntervalSec=25s + +[Install] +WantedBy=multi-user.target + + +[Service] +Type=simple +KillMode=process +NotifyAccess=main +EnvironmentFile=-@CONFIGDIR@/pacemaker +EnvironmentFile=-@CONFIGDIR@/sbd +SuccessExitStatus=100 + +ExecStart=@sbindir@/pacemakerd + +# Systemd v227 and above can limit the number of processes spawned by a +# service. That is a bad idea for an HA cluster resource manager, so disable it +# by default. The administrator can create a local override if they really want +# a limit. If your systemd version does not support TasksMax, and you want to +# get rid of the resulting log warnings, comment out this option. +TasksMax=infinity + +# If pacemakerd doesn't stop, it's probably waiting on a cluster +# resource. Sending -KILL will just get the node fenced +SendSIGKILL=no + +# Systemd's default of respawning a failed service after 100ms is too aggressive +RestartSec=1s + +# If we ever hit the StartLimitInterval/StartLimitBurst limit, and the +# admin wants to stop the cluster while pacemakerd is not running, it +# might be a good idea to enable the ExecStopPost directive below. +# +# However, the node will likely end up being fenced as a result, so it's +# not enabled by default. +# +# ExecStopPost=/usr/bin/killall -TERM pacemaker-attrd pacemaker-based \ +# pacemaker-controld pacemaker-execd pacemaker-fenced \ +# pacemaker-schedulerd + +# If you want Corosync to stop whenever Pacemaker is stopped, +# uncomment the next line too: +# +# ExecStopPost=/bin/sh -c 'pidof pacemaker-controld || killall -TERM corosync' + +# Pacemaker will restart along with Corosync if Corosync is stopped while +# Pacemaker is running. +# In this case, if you want to be fenced always (if you do not want to restart) +# uncomment ExecStopPost below. +# +# ExecStopPost=/bin/sh -c 'pidof corosync || \ +# /usr/bin/systemctl --no-block stop pacemaker' + +# When the service functions properly, it will wait to exit until all resources +# have been stopped on the local node, and potentially across all nodes that +# are shutting down. The default of 30min should cover most typical cluster +# configurations, but it may need an increase to adapt to local conditions +# (e.g. a large, clustered database could conceivably take longer to stop). +TimeoutStopSec=30min +TimeoutStartSec=60s + +# Restart options include: no, on-success, on-failure, on-abort or always +Restart=on-failure + +# crm_perror() writes directly to stderr, so ignore it here +# to avoid double-logging with the wrong format +StandardError=null diff --git a/daemons/pacemakerd/pacemaker.upstart.in b/daemons/pacemakerd/pacemaker.upstart.in new file mode 100644 index 0000000..7a54bc0 --- /dev/null +++ b/daemons/pacemakerd/pacemaker.upstart.in @@ -0,0 +1,33 @@ +# pacemaker - High-Availability cluster resource manager +# +# Starts pacemakerd + +stop on runlevel [0123456] +kill timeout 3600 +respawn + +env prog=pacemakerd +env sysconf=@CONFIGDIR@/pacemaker +env rpm_lockdir=@localstatedir@/lock/subsys +env deb_lockdir=@localstatedir@/lock + +script + [ -f "$sysconf" ] && . "$sysconf" + exec $prog +end script + +post-start script + [ -f "$sysconf" ] && . "$sysconf" + [ -z "$LOCK_FILE" -a -d "$rpm_lockdir" ] && LOCK_FILE="$rpm_lockdir/pacemaker" + [ -z "$LOCK_FILE" -a -d "$deb_lockdir" ] && LOCK_FILE="$deb_lockdir/pacemaker" + touch "$LOCK_FILE" + pidof $prog > "@localstatedir@/run/$prog.pid" +end script + +post-stop script + [ -f "$sysconf" ] && . "$sysconf" + [ -z "$LOCK_FILE" -a -d "$rpm_lockdir" ] && LOCK_FILE="$rpm_lockdir/pacemaker" + [ -z "$LOCK_FILE" -a -d "$deb_lockdir" ] && LOCK_FILE="$deb_lockdir/pacemaker" + rm -f "$LOCK_FILE" + rm -f "@localstatedir@/run/$prog.pid" +end script diff --git a/daemons/pacemakerd/pacemakerd.8.inc b/daemons/pacemakerd/pacemakerd.8.inc new file mode 100644 index 0000000..902af4e --- /dev/null +++ b/daemons/pacemakerd/pacemakerd.8.inc @@ -0,0 +1,5 @@ +[synopsis] +pacemakerd [options] + +/subsidiary Pacemaker daemons/ +.SH OPTIONS diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c new file mode 100644 index 0000000..9f77ccc --- /dev/null +++ b/daemons/pacemakerd/pacemakerd.c @@ -0,0 +1,483 @@ +/* + * Copyright 2010-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> +#include "pacemakerd.h" + +#include <pwd.h> +#include <errno.h> +#include <unistd.h> +#include <stdio.h> +#include <stdbool.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/time.h> +#include <sys/resource.h> + +#include <crm/crm.h> /* indirectly: CRM_EX_* */ +#include <crm/msg_xml.h> +#include <crm/common/mainloop.h> +#include <crm/common/cmdline_internal.h> +#include <crm/common/ipc_pacemakerd.h> +#include <crm/common/output_internal.h> +#include <crm/cluster/internal.h> +#include <crm/cluster.h> + +#define SUMMARY "pacemakerd - primary Pacemaker daemon that launches and monitors all subsidiary Pacemaker daemons" + +struct { + gboolean features; + gboolean foreground; + gboolean shutdown; + gboolean standby; +} options; + +static pcmk__output_t *out = NULL; + +static pcmk__supported_format_t formats[] = { + PCMK__SUPPORTED_FORMAT_NONE, + PCMK__SUPPORTED_FORMAT_TEXT, + PCMK__SUPPORTED_FORMAT_XML, + { NULL, NULL, NULL } +}; + +PCMK__OUTPUT_ARGS("features") +static int +pacemakerd_features(pcmk__output_t *out, va_list args) { + out->info(out, "Pacemaker %s (Build: %s)\n Supporting v%s: %s", PACEMAKER_VERSION, + BUILD_VERSION, CRM_FEATURE_SET, CRM_FEATURES); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("features") +static int +pacemakerd_features_xml(pcmk__output_t *out, va_list args) { + gchar **feature_list = g_strsplit(CRM_FEATURES, " ", 0); + + pcmk__output_xml_create_parent(out, "pacemakerd", + "version", PACEMAKER_VERSION, + "build", BUILD_VERSION, + "feature_set", CRM_FEATURE_SET, + NULL); + out->begin_list(out, NULL, NULL, "features"); + + for (char **s = feature_list; *s != NULL; s++) { + pcmk__output_create_xml_text_node(out, "feature", *s); + } + + out->end_list(out); + + g_strfreev(feature_list); + return pcmk_rc_ok; +} + +static pcmk__message_entry_t fmt_functions[] = { + { "features", "default", pacemakerd_features }, + { "features", "xml", pacemakerd_features_xml }, + + { NULL, NULL, NULL } +}; + +static gboolean +pid_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { + return TRUE; +} + +static gboolean +standby_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { + options.standby = TRUE; + pcmk__set_env_option("node_start_state", "standby"); + return TRUE; +} + +static GOptionEntry entries[] = { + { "features", 'F', 0, G_OPTION_ARG_NONE, &options.features, + "Display full version and list of features Pacemaker was built with", + NULL }, + { "foreground", 'f', 0, G_OPTION_ARG_NONE, &options.foreground, + "(Ignored) Pacemaker always runs in the foreground", + NULL }, + { "pid-file", 'p', 0, G_OPTION_ARG_CALLBACK, pid_cb, + "(Ignored) Daemon pid file location", + "FILE" }, + { "shutdown", 'S', 0, G_OPTION_ARG_NONE, &options.shutdown, + "Instruct Pacemaker to shutdown on this machine", + NULL }, + { "standby", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, standby_cb, + "Start node in standby state", + NULL }, + + { NULL } +}; + +static void +pcmk_ignore(int nsig) +{ + crm_info("Ignoring signal %s (%d)", strsignal(nsig), nsig); +} + +static void +pcmk_sigquit(int nsig) +{ + pcmk__panic(__func__); +} + +static void +mcp_chown(const char *path, uid_t uid, gid_t gid) +{ + int rc = chown(path, uid, gid); + + if (rc < 0) { + crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s", + path, CRM_DAEMON_USER, gid, pcmk_rc_str(errno)); + } +} + +static void +create_pcmk_dirs(void) +{ + uid_t pcmk_uid = 0; + gid_t pcmk_gid = 0; + + const char *dirs[] = { + CRM_PACEMAKER_DIR, // core/blackbox/scheduler/CIB files + CRM_CORE_DIR, // core files + CRM_BLACKBOX_DIR, // blackbox dumps + PE_STATE_DIR, // scheduler inputs + CRM_CONFIG_DIR, // the Cluster Information Base (CIB) + // Don't build CRM_RSCTMP_DIR, pacemaker-execd will do it + NULL + }; + + if (pcmk_daemon_user(&pcmk_uid, &pcmk_gid) < 0) { + crm_err("Cluster user %s does not exist, aborting Pacemaker startup", + CRM_DAEMON_USER); + crm_exit(CRM_EX_NOUSER); + } + + // Used by some resource agents + if ((mkdir(CRM_STATE_DIR, 0750) < 0) && (errno != EEXIST)) { + crm_warn("Could not create directory " CRM_STATE_DIR ": %s", + pcmk_rc_str(errno)); + } else { + mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid); + } + + for (int i = 0; dirs[i] != NULL; ++i) { + int rc = pcmk__build_path(dirs[i], 0750); + + if (rc != pcmk_rc_ok) { + crm_warn("Could not create directory %s: %s", + dirs[i], pcmk_rc_str(rc)); + } else { + mcp_chown(dirs[i], pcmk_uid, pcmk_gid); + } + } +} + +static void +remove_core_file_limit(void) +{ + struct rlimit cores; + + // Get current limits + if (getrlimit(RLIMIT_CORE, &cores) < 0) { + crm_notice("Unable to check system core file limits " + "(consider ensuring the size is unlimited): %s", + strerror(errno)); + return; + } + + // Check whether core dumps are disabled + if (cores.rlim_max == 0) { + if (geteuid() != 0) { // Yes, and there's nothing we can do about it + crm_notice("Core dumps are disabled (consider enabling them)"); + return; + } + cores.rlim_max = RLIM_INFINITY; // Yes, but we're root, so enable them + } + + // Raise soft limit to hard limit (if not already done) + if (cores.rlim_cur != cores.rlim_max) { + cores.rlim_cur = cores.rlim_max; + if (setrlimit(RLIMIT_CORE, &cores) < 0) { + crm_notice("Unable to raise system limit on core file size " + "(consider doing so manually): %s", + strerror(errno)); + return; + } + } + + if (cores.rlim_cur == RLIM_INFINITY) { + crm_trace("Core file size is unlimited"); + } else { + crm_trace("Core file size is limited to %llu bytes", + (unsigned long long) cores.rlim_cur); + } +} + +static void +pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, + enum pcmk_ipc_event event_type, crm_exit_t status, + void *event_data, void *user_data) +{ + pcmk_pacemakerd_api_reply_t *reply = event_data; + + switch (event_type) { + case pcmk_ipc_event_reply: + break; + + default: + return; + } + + if (status != CRM_EX_OK) { + out->err(out, "Bad reply from pacemakerd: %s", crm_exit_str(status)); + return; + } + + if (reply->reply_type != pcmk_pacemakerd_reply_shutdown) { + out->err(out, "Unknown reply type %d from pacemakerd", + reply->reply_type); + } +} + +static GOptionContext * +build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { + GOptionContext *context = NULL; + + context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); + pcmk__add_main_args(context, entries); + return context; +} + +int +main(int argc, char **argv) +{ + int rc = pcmk_rc_ok; + crm_exit_t exit_code = CRM_EX_OK; + + GError *error = NULL; + + GOptionGroup *output_group = NULL; + pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); + gchar **processed_args = pcmk__cmdline_preproc(argv, "p"); + GOptionContext *context = build_arg_context(args, &output_group); + + bool old_instance_connected = false; + + pcmk_ipc_api_t *old_instance = NULL; + qb_ipcs_service_t *ipcs = NULL; + + subdaemon_check_progress = time(NULL); + + setenv("LC_ALL", "C", 1); // Ensure logs are in a common language + + crm_log_preinit(NULL, argc, argv); + mainloop_add_signal(SIGHUP, pcmk_ignore); + mainloop_add_signal(SIGQUIT, pcmk_sigquit); + + pcmk__register_formats(output_group, formats); + if (!g_option_context_parse_strv(context, &processed_args, &error)) { + exit_code = CRM_EX_USAGE; + goto done; + } + + rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); + if ((rc != pcmk_rc_ok) || (out == NULL)) { + exit_code = CRM_EX_ERROR; + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", + args->output_ty, pcmk_rc_str(rc)); + goto done; + } + + pcmk__force_args(context, &error, "%s --xml-simple-list", g_get_prgname()); + + pcmk__register_messages(out, fmt_functions); + + if (options.features) { + out->message(out, "features"); + exit_code = CRM_EX_OK; + goto done; + } + + if (args->version) { + out->version(out, false); + goto done; + } + + pcmk__set_env_option("mcp", "true"); + + if (options.shutdown) { + pcmk__cli_init_logging("pacemakerd", args->verbosity); + } else { + crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); + } + + crm_debug("Checking for existing Pacemaker instance"); + + rc = pcmk_new_ipc_api(&old_instance, pcmk_ipc_pacemakerd); + if (old_instance == NULL) { + out->err(out, "Could not check for existing pacemakerd: %s", pcmk_rc_str(rc)); + exit_code = pcmk_rc2exitc(rc); + goto done; + } + + pcmk_register_ipc_callback(old_instance, pacemakerd_event_cb, NULL); + rc = pcmk_connect_ipc(old_instance, pcmk_ipc_dispatch_sync); + old_instance_connected = pcmk_ipc_is_connected(old_instance); + + if (options.shutdown) { + if (old_instance_connected) { + rc = pcmk_pacemakerd_api_shutdown(old_instance, crm_system_name); + pcmk_dispatch_ipc(old_instance); + + exit_code = pcmk_rc2exitc(rc); + + if (exit_code != CRM_EX_OK) { + pcmk_free_ipc_api(old_instance); + goto done; + } + + /* We get the ACK immediately, and the response right after that, + * but it might take a while for pacemakerd to get around to + * shutting down. Wait for that to happen (with 30-minute timeout). + */ + for (int i = 0; i < 900; i++) { + if (!pcmk_ipc_is_connected(old_instance)) { + exit_code = CRM_EX_OK; + pcmk_free_ipc_api(old_instance); + goto done; + } + + sleep(2); + } + + exit_code = CRM_EX_TIMEOUT; + pcmk_free_ipc_api(old_instance); + goto done; + + } else { + out->err(out, "Could not request shutdown " + "of existing Pacemaker instance: %s", pcmk_rc_str(rc)); + pcmk_free_ipc_api(old_instance); + exit_code = CRM_EX_DISCONNECT; + goto done; + } + + } else if (old_instance_connected) { + pcmk_free_ipc_api(old_instance); + crm_err("Aborting start-up because active Pacemaker instance found"); + exit_code = CRM_EX_FATAL; + goto done; + } + + pcmk_free_ipc_api(old_instance); + + /* Don't allow any accidental output after this point. */ + if (out != NULL) { + out->finish(out, exit_code, true, NULL); + pcmk__output_free(out); + out = NULL; + } + +#ifdef SUPPORT_COROSYNC + if (mcp_read_config() == FALSE) { + crm_exit(CRM_EX_UNAVAILABLE); + } +#endif + + // OCF shell functions and cluster-glue need facility under different name + { + const char *facility = pcmk__env_option(PCMK__ENV_LOGFACILITY); + + if (!pcmk__str_eq(facility, PCMK__VALUE_NONE, + pcmk__str_casei|pcmk__str_null_matches)) { + setenv("HA_LOGFACILITY", facility, 1); + } + } + + crm_notice("Starting Pacemaker %s "CRM_XS" build=%s features:%s", + PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES); + mainloop = g_main_loop_new(NULL, FALSE); + + remove_core_file_limit(); + create_pcmk_dirs(); + pcmk__serve_pacemakerd_ipc(&ipcs, &mcp_ipc_callbacks); + +#ifdef SUPPORT_COROSYNC + /* Allows us to block shutdown */ + if (!cluster_connect_cfg()) { + exit_code = CRM_EX_PROTOCOL; + goto done; + } +#endif + + if (pcmk__locate_sbd() > 0) { + setenv("PCMK_watchdog", "true", 1); + running_with_sbd = TRUE; + } else { + setenv("PCMK_watchdog", "false", 1); + } + + switch (find_and_track_existing_processes()) { + case pcmk_rc_ok: + break; + case pcmk_rc_ipc_unauthorized: + exit_code = CRM_EX_CANTCREAT; + goto done; + default: + exit_code = CRM_EX_FATAL; + goto done; + }; + + mainloop_add_signal(SIGTERM, pcmk_shutdown); + mainloop_add_signal(SIGINT, pcmk_shutdown); + + if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) { + crm_notice("Waiting for startup-trigger from SBD."); + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING; + startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL); + } else { + if (running_with_sbd) { + crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported " + "by your SBD version) improve reliability of " + "interworking between SBD & pacemaker."); + } + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS; + init_children_processes(NULL); + } + + crm_notice("Pacemaker daemon successfully started and accepting connections"); + g_main_loop_run(mainloop); + + if (ipcs) { + crm_trace("Closing IPC server"); + mainloop_del_ipc_server(ipcs); + ipcs = NULL; + } + + g_main_loop_unref(mainloop); +#ifdef SUPPORT_COROSYNC + cluster_disconnect_cfg(); +#endif + +done: + g_strfreev(processed_args); + pcmk__free_arg_context(context); + + pcmk__output_and_clear_error(&error, out); + + if (out != NULL) { + out->finish(out, exit_code, true, NULL); + pcmk__output_free(out); + } + pcmk__unregister_formats(); + crm_exit(exit_code); +} diff --git a/daemons/pacemakerd/pacemakerd.h b/daemons/pacemakerd/pacemakerd.h new file mode 100644 index 0000000..b2a6864 --- /dev/null +++ b/daemons/pacemakerd/pacemakerd.h @@ -0,0 +1,35 @@ +/* + * Copyright 2010-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <stdint.h> + +#define MAX_RESPAWN 100 + +extern GMainLoop *mainloop; +extern struct qb_ipcs_service_handlers mcp_ipc_callbacks; +extern const char *pacemakerd_state; +extern gboolean running_with_sbd; +extern unsigned int shutdown_complete_state_reported_to; +extern gboolean shutdown_complete_state_reported_client_closed; +extern crm_trigger_t *shutdown_trigger; +extern crm_trigger_t *startup_trigger; +extern time_t subdaemon_check_progress; + +gboolean mcp_read_config(void); + +gboolean cluster_connect_cfg(void); +void cluster_disconnect_cfg(void); +int find_and_track_existing_processes(void); +gboolean init_children_processes(void *user_data); +void restart_cluster_subdaemons(void); +void pcmk_shutdown(int nsig); +void pcmkd_shutdown_corosync(void); +bool pcmkd_corosync_connected(void); diff --git a/daemons/pacemakerd/pcmkd_corosync.c b/daemons/pacemakerd/pcmkd_corosync.c new file mode 100644 index 0000000..2648756 --- /dev/null +++ b/daemons/pacemakerd/pcmkd_corosync.c @@ -0,0 +1,371 @@ +/* + * Copyright 2010-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> +#include "pacemakerd.h" + +#include <sys/utsname.h> +#include <sys/stat.h> /* for calls to stat() */ +#include <libgen.h> /* For basename() and dirname() */ + +#include <sys/types.h> +#include <pwd.h> /* For getpwname() */ + +#include <corosync/hdb.h> +#include <corosync/cfg.h> +#include <corosync/cpg.h> +#include <corosync/cmap.h> + +#include <crm/cluster/internal.h> +#include <crm/common/ipc.h> /* for crm_ipc_is_authentic_process */ +#include <crm/common/mainloop.h> + +#include <crm/common/ipc_internal.h> /* PCMK__SPECIAL_PID* */ + +static corosync_cfg_handle_t cfg_handle = 0; +static mainloop_timer_t *reconnect_timer = NULL; + +/* =::=::=::= CFG - Shutdown stuff =::=::=::= */ + +static void +cfg_shutdown_callback(corosync_cfg_handle_t h, corosync_cfg_shutdown_flags_t flags) +{ + crm_info("Corosync wants to shut down: %s", + (flags == COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE) ? "immediate" : + (flags == COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS) ? "forced" : "optional"); + + /* Never allow corosync to shut down while we're running */ + corosync_cfg_replyto_shutdown(h, COROSYNC_CFG_SHUTDOWN_FLAG_NO); +} + +static corosync_cfg_callbacks_t cfg_callbacks = { + .corosync_cfg_shutdown_callback = cfg_shutdown_callback, +}; + +static int +pcmk_cfg_dispatch(gpointer user_data) +{ + corosync_cfg_handle_t *handle = (corosync_cfg_handle_t *) user_data; + cs_error_t rc = corosync_cfg_dispatch(*handle, CS_DISPATCH_ALL); + + if (rc != CS_OK) { + return -1; + } + return 0; +} + +static void +close_cfg(void) +{ + if (cfg_handle != 0) { +#ifdef HAVE_COROSYNC_CFG_TRACKSTART + /* Ideally, we would call corosync_cfg_trackstop(cfg_handle) here, but a + * bug in corosync 3.1.1 and 3.1.2 makes it hang forever. Thankfully, + * it's not necessary since we exit immediately after this. + */ +#endif + corosync_cfg_finalize(cfg_handle); + cfg_handle = 0; + } +} + +static gboolean +cluster_reconnect_cb(gpointer data) +{ + if (cluster_connect_cfg()) { + mainloop_timer_del(reconnect_timer); + reconnect_timer = NULL; + crm_notice("Cluster reconnect succeeded"); + mcp_read_config(); + restart_cluster_subdaemons(); + return G_SOURCE_REMOVE; + } else { + crm_info("Cluster reconnect failed " + "(connection will be reattempted once per second)"); + } + /* + * In theory this will continue forever. In practice the CIB connection from + * attrd will timeout and shut down Pacemaker when it gets bored. + */ + return G_SOURCE_CONTINUE; +} + + +static void +cfg_connection_destroy(gpointer user_data) +{ + crm_warn("Lost connection to cluster layer " + "(connection will be reattempted once per second)"); + corosync_cfg_finalize(cfg_handle); + cfg_handle = 0; + reconnect_timer = mainloop_timer_add("corosync reconnect", 1000, TRUE, cluster_reconnect_cb, NULL); + mainloop_timer_start(reconnect_timer); +} + +void +cluster_disconnect_cfg(void) +{ + close_cfg(); + if (reconnect_timer != NULL) { + /* The mainloop should be gone by this point, so this isn't necessary, + * but cleaning up memory should make valgrind happier. + */ + mainloop_timer_del(reconnect_timer); + reconnect_timer = NULL; + } +} + +#define cs_repeat(counter, max, code) do { \ + code; \ + if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ + counter++; \ + crm_debug("Retrying Corosync operation after %ds", counter); \ + sleep(counter); \ + } else { \ + break; \ + } \ + } while(counter < max) + +gboolean +cluster_connect_cfg(void) +{ + cs_error_t rc; + int fd = -1, retries = 0, rv; + uid_t found_uid = 0; + gid_t found_gid = 0; + pid_t found_pid = 0; + uint32_t nodeid; + + static struct mainloop_fd_callbacks cfg_fd_callbacks = { + .dispatch = pcmk_cfg_dispatch, + .destroy = cfg_connection_destroy, + }; + + cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks)); + + if (rc != CS_OK) { + crm_crit("Could not connect to Corosync CFG: %s " CRM_XS " rc=%d", + cs_strerror(rc), rc); + return FALSE; + } + + rc = corosync_cfg_fd_get(cfg_handle, &fd); + if (rc != CS_OK) { + crm_crit("Could not get Corosync CFG descriptor: %s " CRM_XS " rc=%d", + cs_strerror(rc), rc); + goto bail; + } + + /* CFG provider run as root (in given user namespace, anyway)? */ + if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, + &found_uid, &found_gid))) { + crm_crit("Rejecting Corosync CFG provider because process %lld " + "is running as uid %lld gid %lld, not root", + (long long) PCMK__SPECIAL_PID_AS_0(found_pid), + (long long) found_uid, (long long) found_gid); + goto bail; + } else if (rv < 0) { + crm_crit("Could not authenticate Corosync CFG provider: %s " + CRM_XS " rc=%d", strerror(-rv), -rv); + goto bail; + } + + retries = 0; + cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, &nodeid)); + if (rc != CS_OK) { + crm_crit("Could not get local node ID from Corosync: %s " + CRM_XS " rc=%d", cs_strerror(rc), rc); + goto bail; + } + crm_debug("Corosync reports local node ID is %lu", (unsigned long) nodeid); + +#ifdef HAVE_COROSYNC_CFG_TRACKSTART + retries = 0; + cs_repeat(retries, 30, rc = corosync_cfg_trackstart(cfg_handle, 0)); + if (rc != CS_OK) { + crm_crit("Could not enable Corosync CFG shutdown tracker: %s " CRM_XS " rc=%d", + cs_strerror(rc), rc); + goto bail; + } +#endif + + mainloop_add_fd("corosync-cfg", G_PRIORITY_DEFAULT, fd, &cfg_handle, &cfg_fd_callbacks); + return TRUE; + + bail: + corosync_cfg_finalize(cfg_handle); + return FALSE; +} + +void +pcmkd_shutdown_corosync(void) +{ + cs_error_t rc; + + if (cfg_handle == 0) { + crm_warn("Unable to shut down Corosync: No connection"); + return; + } + crm_info("Asking Corosync to shut down"); + rc = corosync_cfg_try_shutdown(cfg_handle, + COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE); + if (rc == CS_OK) { + close_cfg(); + } else { + crm_warn("Corosync shutdown failed: %s " CRM_XS " rc=%d", + cs_strerror(rc), rc); + } +} + +bool +pcmkd_corosync_connected(void) +{ + cpg_handle_t local_handle = 0; + cpg_model_v1_data_t cpg_model_info = {CPG_MODEL_V1, NULL, NULL, NULL, 0}; + int fd = -1; + + if (cpg_model_initialize(&local_handle, CPG_MODEL_V1, (cpg_model_data_t *) &cpg_model_info, NULL) != CS_OK) { + return false; + } + + if (cpg_fd_get(local_handle, &fd) != CS_OK) { + return false; + } + + cpg_finalize(local_handle); + + return true; +} + +/* =::=::=::= Configuration =::=::=::= */ +static int +get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, char **value, + const char *fallback) +{ + int rc = 0, retries = 0; + + cs_repeat(retries, 5, rc = cmap_get_string(object_handle, key, value)); + if (rc != CS_OK) { + crm_trace("Search for %s failed %d, defaulting to %s", key, rc, fallback); + pcmk__str_update(value, fallback); + } + crm_trace("%s: %s", key, *value); + return rc; +} + +gboolean +mcp_read_config(void) +{ + cs_error_t rc = CS_OK; + int retries = 0; + cmap_handle_t local_handle; + uint64_t config = 0; + int fd = -1; + uid_t found_uid = 0; + gid_t found_gid = 0; + pid_t found_pid = 0; + int rv; + enum cluster_type_e stack; + + // There can be only one possibility + do { + rc = pcmk__init_cmap(&local_handle); + if (rc != CS_OK) { + retries++; + crm_info("Could not connect to Corosync CMAP: %s (retrying in %ds) " + CRM_XS " rc=%d", cs_strerror(rc), retries, rc); + sleep(retries); + + } else { + break; + } + + } while (retries < 5); + + if (rc != CS_OK) { + crm_crit("Could not connect to Corosync CMAP: %s " + CRM_XS " rc=%d", cs_strerror(rc), rc); + return FALSE; + } + + rc = cmap_fd_get(local_handle, &fd); + if (rc != CS_OK) { + crm_crit("Could not get Corosync CMAP descriptor: %s " CRM_XS " rc=%d", + cs_strerror(rc), rc); + cmap_finalize(local_handle); + return FALSE; + } + + /* CMAP provider run as root (in given user namespace, anyway)? */ + if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, + &found_uid, &found_gid))) { + crm_crit("Rejecting Corosync CMAP provider because process %lld " + "is running as uid %lld gid %lld, not root", + (long long) PCMK__SPECIAL_PID_AS_0(found_pid), + (long long) found_uid, (long long) found_gid); + cmap_finalize(local_handle); + return FALSE; + } else if (rv < 0) { + crm_crit("Could not authenticate Corosync CMAP provider: %s " + CRM_XS " rc=%d", strerror(-rv), -rv); + cmap_finalize(local_handle); + return FALSE; + } + + stack = get_cluster_type(); + if (stack != pcmk_cluster_corosync) { + crm_crit("Expected Corosync cluster layer but detected %s " + CRM_XS " stack=%d", name_for_cluster_type(stack), stack); + return FALSE; + } + + crm_info("Reading configuration for %s stack", + name_for_cluster_type(stack)); + pcmk__set_env_option(PCMK__ENV_CLUSTER_TYPE, "corosync"); + pcmk__set_env_option(PCMK__ENV_QUORUM_TYPE, "corosync"); + + // If debug logging is not configured, check whether corosync has it + if (pcmk__env_option(PCMK__ENV_DEBUG) == NULL) { + char *debug_enabled = NULL; + + get_config_opt(config, local_handle, "logging.debug", &debug_enabled, "off"); + + if (crm_is_true(debug_enabled)) { + pcmk__set_env_option(PCMK__ENV_DEBUG, "1"); + if (get_crm_log_level() < LOG_DEBUG) { + set_crm_log_level(LOG_DEBUG); + } + + } else { + pcmk__set_env_option(PCMK__ENV_DEBUG, "0"); + } + + free(debug_enabled); + } + + if(local_handle){ + gid_t gid = 0; + if (pcmk_daemon_user(NULL, &gid) < 0) { + crm_warn("Could not authorize group with Corosync " CRM_XS + " No group found for user %s", CRM_DAEMON_USER); + + } else { + char key[PATH_MAX]; + snprintf(key, PATH_MAX, "uidgid.gid.%u", gid); + rc = cmap_set_uint8(local_handle, key, 1); + if (rc != CS_OK) { + crm_warn("Could not authorize group with Corosync: %s " CRM_XS + " group=%u rc=%d", pcmk__cs_err_str(rc), gid, rc); + } + } + } + cmap_finalize(local_handle); + + return TRUE; +} diff --git a/daemons/pacemakerd/pcmkd_messages.c b/daemons/pacemakerd/pcmkd_messages.c new file mode 100644 index 0000000..7ed9899 --- /dev/null +++ b/daemons/pacemakerd/pcmkd_messages.c @@ -0,0 +1,278 @@ +/* + * Copyright 2010-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> +#include "pacemakerd.h" + +#include <crm/crm.h> +#include <crm/msg_xml.h> + +#include <errno.h> +#include <stdbool.h> +#include <stdint.h> +#include <string.h> +#include <time.h> +#include <sys/types.h> + +static GHashTable *pcmkd_handlers = NULL; + +static xmlNode * +handle_node_cache_request(pcmk__request_t *request) +{ + crm_trace("Ignoring request from client %s to purge node " + "because peer cache is not used", + pcmk__client_name(request->ipc_client)); + + pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, + "ack", NULL, CRM_EX_OK); + return NULL; +} + +static xmlNode * +handle_ping_request(pcmk__request_t *request) +{ + xmlNode *msg = request->xml; + + const char *value = NULL; + xmlNode *ping = NULL; + xmlNode *reply = NULL; + const char *from = crm_element_value(msg, F_CRM_SYS_FROM); + + /* Pinged for status */ + crm_trace("Pinged from " F_CRM_SYS_FROM "='%s' " F_CRM_ORIGIN "='%s'", + pcmk__s(from, ""), + pcmk__s(crm_element_value(msg, F_CRM_ORIGIN), "")); + + pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, + "ack", NULL, CRM_EX_INDETERMINATE); + + ping = create_xml_node(NULL, XML_CRM_TAG_PING); + value = crm_element_value(msg, F_CRM_SYS_TO); + crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value); + crm_xml_add(ping, XML_PING_ATTR_PACEMAKERDSTATE, pacemakerd_state); + crm_xml_add_ll(ping, XML_ATTR_TSTAMP, + (long long) subdaemon_check_progress); + crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok"); + reply = create_reply(msg, ping); + + free_xml(ping); + + if (reply == NULL) { + pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, + "Failed building ping reply for client %s", + pcmk__client_name(request->ipc_client)); + } else { + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + } + + /* just proceed state on sbd pinging us */ + if (from && strstr(from, "sbd")) { + if (pcmk__str_eq(pacemakerd_state, XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE, pcmk__str_none)) { + if (pcmk__get_sbd_sync_resource_startup()) { + crm_notice("Shutdown-complete-state passed to SBD."); + } + + shutdown_complete_state_reported_to = request->ipc_client->pid; + + } else if (pcmk__str_eq(pacemakerd_state, XML_PING_ATTR_PACEMAKERDSTATE_WAITPING, pcmk__str_none)) { + crm_notice("Received startup-trigger from SBD."); + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS; + mainloop_set_trigger(startup_trigger); + } + } + + return reply; +} + +static xmlNode * +handle_shutdown_request(pcmk__request_t *request) +{ + xmlNode *msg = request->xml; + + xmlNode *shutdown = NULL; + xmlNode *reply = NULL; + + /* Only allow privileged users (i.e. root or hacluster) to shut down + * Pacemaker from the command line (or direct IPC), so that other users + * are forced to go through the CIB and have ACLs applied. + */ + bool allowed = pcmk_is_set(request->ipc_client->flags, pcmk__client_privileged); + + pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, + "ack", NULL, CRM_EX_INDETERMINATE); + + shutdown = create_xml_node(NULL, XML_CIB_ATTR_SHUTDOWN); + + if (allowed) { + crm_notice("Shutting down in response to IPC request %s from %s", + crm_element_value(msg, F_CRM_REFERENCE), + crm_element_value(msg, F_CRM_ORIGIN)); + crm_xml_add_int(shutdown, XML_LRM_ATTR_OPSTATUS, CRM_EX_OK); + } else { + crm_warn("Ignoring shutdown request from unprivileged client %s", + pcmk__client_name(request->ipc_client)); + crm_xml_add_int(shutdown, XML_LRM_ATTR_OPSTATUS, CRM_EX_INSUFFICIENT_PRIV); + } + + reply = create_reply(msg, shutdown); + free_xml(shutdown); + + if (reply == NULL) { + pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, + "Failed building shutdown reply for client %s", + pcmk__client_name(request->ipc_client)); + } else { + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + } + + if (allowed) { + pcmk_shutdown(15); + } + + return reply; +} + +static xmlNode * +handle_unknown_request(pcmk__request_t *request) +{ + pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, + "ack", NULL, CRM_EX_INVALID_PARAM); + + pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, + "Unknown IPC request type '%s' (bug?)", + pcmk__client_name(request->ipc_client)); + return NULL; +} + +static void +pcmkd_register_handlers(void) +{ + pcmk__server_command_t handlers[] = { + { CRM_OP_RM_NODE_CACHE, handle_node_cache_request }, + { CRM_OP_PING, handle_ping_request }, + { CRM_OP_QUIT, handle_shutdown_request }, + { NULL, handle_unknown_request }, + }; + + pcmkd_handlers = pcmk__register_handlers(handlers); +} + +static int32_t +pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) +{ + crm_trace("Connection %p", c); + if (pcmk__new_client(c, uid, gid) == NULL) { + return -EIO; + } + return 0; +} + +/* Error code means? */ +static int32_t +pcmk_ipc_closed(qb_ipcs_connection_t * c) +{ + pcmk__client_t *client = pcmk__find_client(c); + + if (client == NULL) { + return 0; + } + crm_trace("Connection %p", c); + if (shutdown_complete_state_reported_to == client->pid) { + shutdown_complete_state_reported_client_closed = TRUE; + if (shutdown_trigger) { + mainloop_set_trigger(shutdown_trigger); + } + } + pcmk__free_client(client); + return 0; +} + +static void +pcmk_ipc_destroy(qb_ipcs_connection_t * c) +{ + crm_trace("Connection %p", c); + pcmk_ipc_closed(c); +} + +/* Exit code means? */ +static int32_t +pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) +{ + uint32_t id = 0; + uint32_t flags = 0; + xmlNode *msg = NULL; + pcmk__client_t *c = pcmk__find_client(qbc); + + CRM_CHECK(c != NULL, return 0); + + if (pcmkd_handlers == NULL) { + pcmkd_register_handlers(); + } + + msg = pcmk__client_data2xml(c, data, &id, &flags); + if (msg == NULL) { + pcmk__ipc_send_ack(c, id, flags, "ack", NULL, CRM_EX_PROTOCOL); + return 0; + + } else { + char *log_msg = NULL; + const char *reason = NULL; + xmlNode *reply = NULL; + + pcmk__request_t request = { + .ipc_client = c, + .ipc_id = id, + .ipc_flags = flags, + .peer = NULL, + .xml = msg, + .call_options = 0, + .result = PCMK__UNKNOWN_RESULT, + }; + + request.op = crm_element_value_copy(request.xml, F_CRM_TASK); + CRM_CHECK(request.op != NULL, return 0); + + reply = pcmk__process_request(&request, pcmkd_handlers); + + if (reply != NULL) { + pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event); + free_xml(reply); + } + + reason = request.result.exit_reason; + + log_msg = crm_strdup_printf("Processed %s request from %s %s: %s%s%s%s", + request.op, pcmk__request_origin_type(&request), + pcmk__request_origin(&request), + pcmk_exec_status_str(request.result.execution_status), + (reason == NULL)? "" : " (", + (reason == NULL)? "" : reason, + (reason == NULL)? "" : ")"); + + if (!pcmk__result_ok(&request.result)) { + crm_warn("%s", log_msg); + } else { + crm_debug("%s", log_msg); + } + + free(log_msg); + pcmk__reset_request(&request); + } + + free_xml(msg); + return 0; +} + +struct qb_ipcs_service_handlers mcp_ipc_callbacks = { + .connection_accept = pcmk_ipc_accept, + .connection_created = NULL, + .msg_process = pcmk_ipc_dispatch, + .connection_closed = pcmk_ipc_closed, + .connection_destroyed = pcmk_ipc_destroy +}; diff --git a/daemons/pacemakerd/pcmkd_subdaemons.c b/daemons/pacemakerd/pcmkd_subdaemons.c new file mode 100644 index 0000000..3b08ecc --- /dev/null +++ b/daemons/pacemakerd/pcmkd_subdaemons.c @@ -0,0 +1,888 @@ +/* + * Copyright 2010-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> +#include "pacemakerd.h" + +#include <errno.h> +#include <grp.h> +#include <signal.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include <crm/cluster.h> +#include <crm/msg_xml.h> + +typedef struct pcmk_child_s { + pid_t pid; + int respawn_count; + bool respawn; + const char *name; + const char *uid; + const char *command; + const char *endpoint; /* IPC server name */ + bool needs_cluster; + int check_count; + + /* Anything below here will be dynamically initialized */ + bool needs_retry; + bool active_before_startup; +} pcmk_child_t; + +#define PCMK_PROCESS_CHECK_INTERVAL 1 +#define PCMK_PROCESS_CHECK_RETRIES 5 +#define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */ + +/* Index into the array below */ +#define PCMK_CHILD_CONTROLD 5 + +static pcmk_child_t pcmk_children[] = { + { + 0, 0, true, "pacemaker-based", CRM_DAEMON_USER, + CRM_DAEMON_DIR "/pacemaker-based", PCMK__SERVER_BASED_RO, + true + }, + { + 0, 0, true, "pacemaker-fenced", NULL, + CRM_DAEMON_DIR "/pacemaker-fenced", "stonith-ng", + true + }, + { + 0, 0, true, "pacemaker-execd", NULL, + CRM_DAEMON_DIR "/pacemaker-execd", CRM_SYSTEM_LRMD, + false + }, + { + 0, 0, true, "pacemaker-attrd", CRM_DAEMON_USER, + CRM_DAEMON_DIR "/pacemaker-attrd", T_ATTRD, + true + }, + { + 0, 0, true, "pacemaker-schedulerd", CRM_DAEMON_USER, + CRM_DAEMON_DIR "/pacemaker-schedulerd", CRM_SYSTEM_PENGINE, + false + }, + { + 0, 0, true, "pacemaker-controld", CRM_DAEMON_USER, + CRM_DAEMON_DIR "/pacemaker-controld", CRM_SYSTEM_CRMD, + true + }, +}; + +static char *opts_default[] = { NULL, NULL }; +static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL }; + +crm_trigger_t *shutdown_trigger = NULL; +crm_trigger_t *startup_trigger = NULL; +time_t subdaemon_check_progress = 0; + +// Whether we need root group access to talk to cluster layer +static bool need_root_group = true; + +/* When contacted via pacemakerd-api by a client having sbd in + * the name we assume it is sbd-daemon which wants to know + * if pacemakerd shutdown gracefully. + * Thus when everything is shutdown properly pacemakerd + * waits till it has reported the graceful completion of + * shutdown to sbd and just when sbd-client closes the + * connection we can assume that the report has arrived + * properly so that pacemakerd can finally exit. + * Following two variables are used to track that handshake. + */ +unsigned int shutdown_complete_state_reported_to = 0; +gboolean shutdown_complete_state_reported_client_closed = FALSE; + +/* state we report when asked via pacemakerd-api status-ping */ +const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT; +gboolean running_with_sbd = FALSE; /* local copy */ + +GMainLoop *mainloop = NULL; + +static gboolean fatal_error = FALSE; + +static int child_liveness(pcmk_child_t *child); +static gboolean escalate_shutdown(gpointer data); +static int start_child(pcmk_child_t * child); +static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode); +static void pcmk_process_exit(pcmk_child_t * child); +static gboolean pcmk_shutdown_worker(gpointer user_data); +static gboolean stop_child(pcmk_child_t * child, int signal); + +static bool +pcmkd_cluster_connected(void) +{ +#if SUPPORT_COROSYNC + return pcmkd_corosync_connected(); +#else + return true; +#endif +} + +static gboolean +check_next_subdaemon(gpointer user_data) +{ + static int next_child = 0; + int rc = child_liveness(&pcmk_children[next_child]); + + crm_trace("Checked %s[%lld]: %s (%d)", + pcmk_children[next_child].name, + (long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[next_child].pid), + pcmk_rc_str(rc), rc); + + switch (rc) { + case pcmk_rc_ok: + pcmk_children[next_child].check_count = 0; + subdaemon_check_progress = time(NULL); + break; + case pcmk_rc_ipc_pid_only: // This case: it was previously OK + pcmk_children[next_child].check_count++; + if (pcmk_children[next_child].check_count >= PCMK_PROCESS_CHECK_RETRIES) { + crm_err("%s[%lld] is unresponsive to ipc after %d tries but " + "we found the pid so have it killed that we can restart", + pcmk_children[next_child].name, + (long long) PCMK__SPECIAL_PID_AS_0( + pcmk_children[next_child].pid), + pcmk_children[next_child].check_count); + stop_child(&pcmk_children[next_child], SIGKILL); + if (pcmk_children[next_child].respawn) { + /* as long as the respawn-limit isn't reached + give it another round of check retries + */ + pcmk_children[next_child].check_count = 0; + } + } else { + crm_notice("%s[%lld] is unresponsive to ipc after %d tries", + pcmk_children[next_child].name, + (long long) PCMK__SPECIAL_PID_AS_0( + pcmk_children[next_child].pid), + pcmk_children[next_child].check_count); + if (pcmk_children[next_child].respawn) { + /* as long as the respawn-limit isn't reached + and we haven't run out of connect retries + we account this as progress we are willing + to tell to sbd + */ + subdaemon_check_progress = time(NULL); + } + } + /* go to the next child and see if + we can make progress there + */ + break; + case pcmk_rc_ipc_unresponsive: + if (!pcmk_children[next_child].respawn) { + /* if a subdaemon is down and we don't want it + to be restarted this is a success during + shutdown. if it isn't restarted anymore + due to MAX_RESPAWN it is + rather no success. + */ + if (pcmk_children[next_child].respawn_count <= MAX_RESPAWN) { + subdaemon_check_progress = time(NULL); + } + } + if (!pcmk_children[next_child].active_before_startup) { + crm_trace("found %s[%lld] missing - signal-handler " + "will take care of it", + pcmk_children[next_child].name, + (long long) PCMK__SPECIAL_PID_AS_0( + pcmk_children[next_child].pid)); + break; + } + if (pcmk_children[next_child].respawn) { + crm_err("%s[%lld] terminated", + pcmk_children[next_child].name, + (long long) PCMK__SPECIAL_PID_AS_0( + pcmk_children[next_child].pid)); + } else { + /* orderly shutdown */ + crm_notice("%s[%lld] terminated", + pcmk_children[next_child].name, + (long long) PCMK__SPECIAL_PID_AS_0( + pcmk_children[next_child].pid)); + } + pcmk_process_exit(&(pcmk_children[next_child])); + break; + default: + crm_exit(CRM_EX_FATAL); + break; /* static analysis/noreturn */ + } + + next_child++; + if (next_child >= PCMK__NELEM(pcmk_children)) { + next_child = 0; + } + + return G_SOURCE_CONTINUE; +} + +static gboolean +escalate_shutdown(gpointer data) +{ + pcmk_child_t *child = data; + + if (child->pid == PCMK__SPECIAL_PID) { + pcmk_process_exit(child); + + } else if (child->pid != 0) { + /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */ + crm_err("Child %s not terminating in a timely manner, forcing", child->name); + stop_child(child, SIGSEGV); + } + return FALSE; +} + +static void +pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) +{ + pcmk_child_t *child = mainloop_child_userdata(p); + const char *name = mainloop_child_name(p); + + if (signo) { + do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR), + "%s[%d] terminated with signal %d (%s)%s", + name, pid, signo, strsignal(signo), + (core? " and dumped core" : "")); + + } else { + switch(exitcode) { + case CRM_EX_OK: + crm_info("%s[%d] exited with status %d (%s)", + name, pid, exitcode, crm_exit_str(exitcode)); + break; + + case CRM_EX_FATAL: + crm_warn("Shutting cluster down because %s[%d] had fatal failure", + name, pid); + child->respawn = false; + fatal_error = TRUE; + pcmk_shutdown(SIGTERM); + break; + + case CRM_EX_PANIC: + crm_emerg("%s[%d] instructed the machine to reset", name, pid); + child->respawn = false; + fatal_error = TRUE; + pcmk__panic(__func__); + pcmk_shutdown(SIGTERM); + break; + + default: + crm_err("%s[%d] exited with status %d (%s)", + name, pid, exitcode, crm_exit_str(exitcode)); + break; + } + } + + pcmk_process_exit(child); +} + +static void +pcmk_process_exit(pcmk_child_t * child) +{ + child->pid = 0; + child->active_before_startup = false; + child->check_count = 0; + + child->respawn_count += 1; + if (child->respawn_count > MAX_RESPAWN) { + crm_err("Child respawn count exceeded by %s", child->name); + child->respawn = false; + } + + if (shutdown_trigger) { + /* resume step-wise shutdown (returned TRUE yields no parallelizing) */ + mainloop_set_trigger(shutdown_trigger); + + } else if (!child->respawn) { + /* nothing to do */ + + } else if (crm_is_true(getenv("PCMK_fail_fast"))) { + crm_err("Rebooting system because of %s", child->name); + pcmk__panic(__func__); + + } else if (child_liveness(child) == pcmk_rc_ok) { + crm_warn("One-off suppressing strict respawning of a child process %s," + " appears alright per %s IPC end-point", + child->name, child->endpoint); + + } else if (child->needs_cluster && !pcmkd_cluster_connected()) { + crm_notice("Not respawning %s subdaemon until cluster returns", + child->name); + child->needs_retry = true; + + } else { + crm_notice("Respawning %s subdaemon after unexpected exit", + child->name); + start_child(child); + } +} + +static gboolean +pcmk_shutdown_worker(gpointer user_data) +{ + static int phase = PCMK__NELEM(pcmk_children) - 1; + static time_t next_log = 0; + + if (phase == PCMK__NELEM(pcmk_children) - 1) { + crm_notice("Shutting down Pacemaker"); + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN; + } + + for (; phase >= 0; phase--) { + pcmk_child_t *child = &(pcmk_children[phase]); + + if (child->pid != 0) { + time_t now = time(NULL); + + if (child->respawn) { + if (child->pid == PCMK__SPECIAL_PID) { + crm_warn("The process behind %s IPC cannot be" + " terminated, so either wait the graceful" + " period of %ld s for its native termination" + " if it vitally depends on some other daemons" + " going down in a controlled way already," + " or locate and kill the correct %s process" + " on your own; set PCMK_fail_fast=1 to avoid" + " this altogether next time around", + child->name, (long) SHUTDOWN_ESCALATION_PERIOD, + child->command); + } + next_log = now + 30; + child->respawn = false; + stop_child(child, SIGTERM); + if (phase < PCMK_CHILD_CONTROLD) { + g_timeout_add(SHUTDOWN_ESCALATION_PERIOD, + escalate_shutdown, child); + } + + } else if (now >= next_log) { + next_log = now + 30; + crm_notice("Still waiting for %s to terminate " + CRM_XS " pid=%lld", + child->name, (long long) child->pid); + } + return TRUE; + } + + /* cleanup */ + crm_debug("%s confirmed stopped", child->name); + child->pid = 0; + } + + crm_notice("Shutdown complete"); + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE; + if (!fatal_error && running_with_sbd && + pcmk__get_sbd_sync_resource_startup() && + !shutdown_complete_state_reported_client_closed) { + crm_notice("Waiting for SBD to pick up shutdown-complete-state."); + return TRUE; + } + + { + const char *delay = pcmk__env_option(PCMK__ENV_SHUTDOWN_DELAY); + if(delay) { + sync(); + pcmk__sleep_ms(crm_get_msec(delay)); + } + } + + g_main_loop_quit(mainloop); + + if (fatal_error) { + crm_notice("Shutting down and staying down after fatal error"); +#ifdef SUPPORT_COROSYNC + pcmkd_shutdown_corosync(); +#endif + crm_exit(CRM_EX_FATAL); + } + + return TRUE; +} + +/* TODO once libqb is taught to juggle with IPC end-points carried over as + bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325) + it shall hand over these descriptors here if/once they are successfully + pre-opened in (presumably) child_liveness(), to avoid any remaining + room for races */ + // \return Standard Pacemaker return code +static int +start_child(pcmk_child_t * child) +{ + uid_t uid = 0; + gid_t gid = 0; + gboolean use_valgrind = FALSE; + gboolean use_callgrind = FALSE; + const char *env_valgrind = getenv("PCMK_valgrind_enabled"); + const char *env_callgrind = getenv("PCMK_callgrind_enabled"); + + child->active_before_startup = false; + child->check_count = 0; + + if (child->command == NULL) { + crm_info("Nothing to do for child \"%s\"", child->name); + return pcmk_rc_ok; + } + + if (env_callgrind != NULL && crm_is_true(env_callgrind)) { + use_callgrind = TRUE; + use_valgrind = TRUE; + + } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) { + use_callgrind = TRUE; + use_valgrind = TRUE; + + } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) { + use_valgrind = TRUE; + + } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) { + use_valgrind = TRUE; + } + + if (use_valgrind && strlen(VALGRIND_BIN) == 0) { + crm_warn("Cannot enable valgrind for %s:" + " The location of the valgrind binary is unknown", child->name); + use_valgrind = FALSE; + } + + if (child->uid) { + if (crm_user_lookup(child->uid, &uid, &gid) < 0) { + crm_err("Invalid user (%s) for %s: not found", child->uid, child->name); + return EACCES; + } + crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name); + } + + child->pid = fork(); + CRM_ASSERT(child->pid != -1); + + if (child->pid > 0) { + /* parent */ + mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit); + + crm_info("Forked child %lld for process %s%s", + (long long) child->pid, child->name, + use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : ""); + return pcmk_rc_ok; + + } else { + /* Start a new session */ + (void)setsid(); + + /* Setup the two alternate arg arrays */ + opts_vgrind[0] = strdup(VALGRIND_BIN); + if (use_callgrind) { + opts_vgrind[1] = strdup("--tool=callgrind"); + opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p"); + opts_vgrind[3] = strdup(child->command); + opts_vgrind[4] = NULL; + } else { + opts_vgrind[1] = strdup(child->command); + opts_vgrind[2] = NULL; + opts_vgrind[3] = NULL; + opts_vgrind[4] = NULL; + } + opts_default[0] = strdup(child->command); + + if(gid) { + // Drop root group access if not needed + if (!need_root_group && (setgid(gid) < 0)) { + crm_warn("Could not set group to %d: %s", gid, strerror(errno)); + } + + /* Initialize supplementary groups to only those always granted to + * the user, plus haclient (so we can access IPC). + */ + if (initgroups(child->uid, gid) < 0) { + crm_err("Cannot initialize groups for %s: %s (%d)", + child->uid, pcmk_rc_str(errno), errno); + } + } + + if (uid && setuid(uid) < 0) { + crm_warn("Could not set user to %s (id %d): %s", + child->uid, uid, strerror(errno)); + } + + pcmk__close_fds_in_child(true); + + pcmk__open_devnull(O_RDONLY); // stdin (fd 0) + pcmk__open_devnull(O_WRONLY); // stdout (fd 1) + pcmk__open_devnull(O_WRONLY); // stderr (fd 2) + + if (use_valgrind) { + (void)execvp(VALGRIND_BIN, opts_vgrind); + } else { + (void)execvp(child->command, opts_default); + } + crm_crit("Could not execute %s: %s", child->command, strerror(errno)); + crm_exit(CRM_EX_FATAL); + } + return pcmk_rc_ok; /* never reached */ +} + +/*! + * \internal + * \brief Check the liveness of the child based on IPC name and PID if tracked + * + * \param[in,out] child Child tracked data + * + * \return Standard Pacemaker return code + * + * \note Return codes of particular interest include pcmk_rc_ipc_unresponsive + * indicating that no trace of IPC liveness was detected, + * pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by + * an unauthorized process, and pcmk_rc_ipc_pid_only indicating that + * the child is up by PID but not IPC end-point (possibly starting). + * \note This function doesn't modify any of \p child members but \c pid, + * and is not actively toying with processes as such but invoking + * \c stop_child in one particular case (there's for some reason + * a different authentic holder of the IPC end-point). + */ +static int +child_liveness(pcmk_child_t *child) +{ + uid_t cl_uid = 0; + gid_t cl_gid = 0; + const uid_t root_uid = 0; + const gid_t root_gid = 0; + const uid_t *ref_uid; + const gid_t *ref_gid; + int rc = pcmk_rc_ipc_unresponsive; + pid_t ipc_pid = 0; + + if (child->endpoint == NULL + && (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) { + crm_err("Cannot track child %s for missing both API end-point and PID", + child->name); + rc = EINVAL; // Misuse of function when child is not trackable + + } else if (child->endpoint != NULL) { + int legacy_rc = pcmk_ok; + + if (child->uid == NULL) { + ref_uid = &root_uid; + ref_gid = &root_gid; + } else { + ref_uid = &cl_uid; + ref_gid = &cl_gid; + legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid); + } + + if (legacy_rc < 0) { + rc = pcmk_legacy2rc(legacy_rc); + crm_err("Could not find user and group IDs for user %s: %s " + CRM_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc); + } else { + rc = pcmk__ipc_is_authentic_process_active(child->endpoint, + *ref_uid, *ref_gid, + &ipc_pid); + if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) { + if (child->pid <= 0) { + /* If rc is pcmk_rc_ok, ipc_pid is nonzero and this + * initializes a new child. If rc is + * pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will + * investigate further. + */ + child->pid = ipc_pid; + } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) { + /* An unexpected (but authorized) process is responding to + * IPC. Investigate further. + */ + rc = pcmk_rc_ipc_unresponsive; + } + } + } + } + + if (rc == pcmk_rc_ipc_unresponsive) { + /* If we get here, a child without IPC is being tracked, no IPC liveness + * has been detected, or IPC liveness has been detected with an + * unexpected (but authorized) process. This is safe on FreeBSD since + * the only change possible from a proper child's PID into "special" PID + * of 1 behind more loosely related process. + */ + int ret = pcmk__pid_active(child->pid, child->name); + + if (ipc_pid && ((ret != pcmk_rc_ok) + || ipc_pid == PCMK__SPECIAL_PID + || (pcmk__pid_active(ipc_pid, + child->name) == pcmk_rc_ok))) { + /* An unexpected (but authorized) process was detected at the IPC + * endpoint, and either it is active, or the child we're tracking is + * not. + */ + + if (ret == pcmk_rc_ok) { + /* The child we're tracking is active. Kill it, and adopt the + * detected process. This assumes that our children don't fork + * (thus getting a different PID owning the IPC), but rather the + * tracking got out of sync because of some means external to + * Pacemaker, and adopting the detected process is better than + * killing it and possibly having to spawn a new child. + */ + /* not possessing IPC, afterall (what about corosync CPG?) */ + stop_child(child, SIGKILL); + } + rc = pcmk_rc_ok; + child->pid = ipc_pid; + } else if (ret == pcmk_rc_ok) { + // Our tracked child's PID was found active, but not its IPC + rc = pcmk_rc_ipc_pid_only; + } else if ((child->pid == 0) && (ret == EINVAL)) { + // FreeBSD can return EINVAL + rc = pcmk_rc_ipc_unresponsive; + } else { + switch (ret) { + case EACCES: + rc = pcmk_rc_ipc_unauthorized; + break; + case ESRCH: + rc = pcmk_rc_ipc_unresponsive; + break; + default: + rc = ret; + break; + } + } + } + return rc; +} + +/*! + * \internal + * \brief Initial one-off check of the pre-existing "child" processes + * + * With "child" process, we mean the subdaemon that defines an API end-point + * (all of them do as of the comment) -- the possible complement is skipped + * as it is deemed it has no such shared resources to cause conflicts about, + * hence it can presumably be started anew without hesitation. + * If that won't hold true in the future, the concept of a shared resource + * will have to be generalized beyond the API end-point. + * + * For boundary cases that the "child" is still starting (IPC end-point is yet + * to be witnessed), or more rarely (practically FreeBSD only), when there's + * a pre-existing "untrackable" authentic process, we give the situation some + * time to possibly unfold in the right direction, meaning that said socket + * will appear or the unattainable process will disappear per the observable + * IPC, respectively. + * + * \return Standard Pacemaker return code + * + * \note Since this gets run at the very start, \c respawn_count fields + * for particular children get temporarily overloaded with "rounds + * of waiting" tracking, restored once we are about to finish with + * success (i.e. returning value >=0) and will remain unrestored + * otherwise. One way to suppress liveness detection logic for + * particular child is to set the said value to a negative number. + */ +#define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */ +int +find_and_track_existing_processes(void) +{ + bool wait_in_progress; + int rc; + size_t i, rounds; + + for (rounds = 1; rounds <= WAIT_TRIES; rounds++) { + wait_in_progress = false; + for (i = 0; i < PCMK__NELEM(pcmk_children); i++) { + + if ((pcmk_children[i].endpoint == NULL) + || (pcmk_children[i].respawn_count < 0)) { + continue; + } + + rc = child_liveness(&pcmk_children[i]); + if (rc == pcmk_rc_ipc_unresponsive) { + /* As a speculation, don't give up if there are more rounds to + * come for other reasons, but don't artificially wait just + * because of this, since we would preferably start ASAP. + */ + continue; + } + + pcmk_children[i].respawn_count = rounds; + switch (rc) { + case pcmk_rc_ok: + if (pcmk_children[i].pid == PCMK__SPECIAL_PID) { + if (crm_is_true(getenv("PCMK_fail_fast"))) { + crm_crit("Cannot reliably track pre-existing" + " authentic process behind %s IPC on this" + " platform and PCMK_fail_fast requested", + pcmk_children[i].endpoint); + return EOPNOTSUPP; + } else if (pcmk_children[i].respawn_count == WAIT_TRIES) { + crm_notice("Assuming pre-existing authentic, though" + " on this platform untrackable, process" + " behind %s IPC is stable (was in %d" + " previous samples) so rather than" + " bailing out (PCMK_fail_fast not" + " requested), we just switch to a less" + " optimal IPC liveness monitoring" + " (not very suitable for heavy load)", + pcmk_children[i].name, WAIT_TRIES - 1); + crm_warn("The process behind %s IPC cannot be" + " terminated, so the overall shutdown" + " will get delayed implicitly (%ld s)," + " which serves as a graceful period for" + " its native termination if it vitally" + " depends on some other daemons going" + " down in a controlled way already", + pcmk_children[i].name, + (long) SHUTDOWN_ESCALATION_PERIOD); + } else { + wait_in_progress = true; + crm_warn("Cannot reliably track pre-existing" + " authentic process behind %s IPC on this" + " platform, can still disappear in %d" + " attempt(s)", pcmk_children[i].endpoint, + WAIT_TRIES - pcmk_children[i].respawn_count); + continue; + } + } + crm_notice("Tracking existing %s process (pid=%lld)", + pcmk_children[i].name, + (long long) PCMK__SPECIAL_PID_AS_0( + pcmk_children[i].pid)); + pcmk_children[i].respawn_count = -1; /* 0~keep watching */ + pcmk_children[i].active_before_startup = true; + break; + case pcmk_rc_ipc_pid_only: + if (pcmk_children[i].respawn_count == WAIT_TRIES) { + crm_crit("%s IPC end-point for existing authentic" + " process %lld did not (re)appear", + pcmk_children[i].endpoint, + (long long) PCMK__SPECIAL_PID_AS_0( + pcmk_children[i].pid)); + return rc; + } + wait_in_progress = true; + crm_warn("Cannot find %s IPC end-point for existing" + " authentic process %lld, can still (re)appear" + " in %d attempts (?)", + pcmk_children[i].endpoint, + (long long) PCMK__SPECIAL_PID_AS_0( + pcmk_children[i].pid), + WAIT_TRIES - pcmk_children[i].respawn_count); + continue; + default: + crm_crit("Checked liveness of %s: %s " CRM_XS " rc=%d", + pcmk_children[i].name, pcmk_rc_str(rc), rc); + return rc; + } + } + if (!wait_in_progress) { + break; + } + pcmk__sleep_ms(250); // Wait a bit for changes to possibly happen + } + for (i = 0; i < PCMK__NELEM(pcmk_children); i++) { + pcmk_children[i].respawn_count = 0; /* restore pristine state */ + } + + g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon, + NULL); + return pcmk_rc_ok; +} + +gboolean +init_children_processes(void *user_data) +{ + if (is_corosync_cluster()) { + /* Corosync clusters can drop root group access, because we set + * uidgid.gid.${gid}=1 via CMAP, which allows these processes to connect + * to corosync. + */ + need_root_group = false; + } + + /* start any children that have not been detected */ + for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) { + if (pcmk_children[i].pid != 0) { + /* we are already tracking it */ + continue; + } + + start_child(&(pcmk_children[i])); + } + + /* From this point on, any daemons being started will be due to + * respawning rather than node start. + * + * This may be useful for the daemons to know + */ + setenv("PCMK_respawned", "true", 1); + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING; + return TRUE; +} + +void +pcmk_shutdown(int nsig) +{ + if (shutdown_trigger == NULL) { + shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL); + } + mainloop_set_trigger(shutdown_trigger); +} + +void +restart_cluster_subdaemons(void) +{ + for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) { + if (!pcmk_children[i].needs_retry || pcmk_children[i].pid != 0) { + continue; + } + + crm_notice("Respawning cluster-based subdaemon: %s", pcmk_children[i].name); + if (start_child(&pcmk_children[i])) { + pcmk_children[i].needs_retry = false; + } + } +} + +static gboolean +stop_child(pcmk_child_t * child, int signal) +{ + if (signal == 0) { + signal = SIGTERM; + } + + /* why to skip PID of 1? + - FreeBSD ~ how untrackable process behind IPC is masqueraded as + - elsewhere: how "init" task is designated; in particular, in systemd + arrangement of socket-based activation, this is pretty real */ + if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) { + crm_debug("Nothing to do for child \"%s\" (process %lld)", + child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid)); + return TRUE; + } + + if (child->pid <= 0) { + crm_trace("Client %s not running", child->name); + return TRUE; + } + + errno = 0; + if (kill(child->pid, signal) == 0) { + crm_notice("Stopping %s "CRM_XS" sent signal %d to process %lld", + child->name, signal, (long long) child->pid); + + } else { + crm_err("Could not stop %s (process %lld) with signal %d: %s", + child->name, (long long) child->pid, signal, strerror(errno)); + } + + return TRUE; +} + |