summaryrefslogtreecommitdiffstats
path: root/daemons
diff options
context:
space:
mode:
Diffstat (limited to 'daemons')
-rw-r--r--daemons/Makefile.am11
-rw-r--r--daemons/attrd/Makefile.am48
-rw-r--r--daemons/attrd/attrd_alerts.c145
-rw-r--r--daemons/attrd/attrd_attributes.c188
-rw-r--r--daemons/attrd/attrd_cib.c380
-rw-r--r--daemons/attrd/attrd_corosync.c620
-rw-r--r--daemons/attrd/attrd_elections.c179
-rw-r--r--daemons/attrd/attrd_ipc.c628
-rw-r--r--daemons/attrd/attrd_messages.c328
-rw-r--r--daemons/attrd/attrd_sync.c577
-rw-r--r--daemons/attrd/attrd_utils.c362
-rw-r--r--daemons/attrd/pacemaker-attrd.c358
-rw-r--r--daemons/attrd/pacemaker-attrd.h216
-rw-r--r--daemons/based/Makefile.am47
-rw-r--r--daemons/based/based_callbacks.c1696
-rw-r--r--daemons/based/based_common.c352
-rw-r--r--daemons/based/based_io.c473
-rw-r--r--daemons/based/based_messages.c427
-rw-r--r--daemons/based/based_notify.c305
-rw-r--r--daemons/based/based_remote.c680
-rw-r--r--daemons/based/cib.pam6
-rw-r--r--daemons/based/pacemaker-based.c442
-rw-r--r--daemons/based/pacemaker-based.h150
-rw-r--r--daemons/controld/Makefile.am87
-rw-r--r--daemons/controld/controld_alerts.c88
-rw-r--r--daemons/controld/controld_alerts.h22
-rw-r--r--daemons/controld/controld_attrd.c160
-rw-r--r--daemons/controld/controld_callbacks.c367
-rw-r--r--daemons/controld/controld_callbacks.h21
-rw-r--r--daemons/controld/controld_cib.c1138
-rw-r--r--daemons/controld/controld_cib.h125
-rw-r--r--daemons/controld/controld_control.c857
-rw-r--r--daemons/controld/controld_corosync.c164
-rw-r--r--daemons/controld/controld_election.c292
-rw-r--r--daemons/controld/controld_execd.c2433
-rw-r--r--daemons/controld/controld_execd_state.c814
-rw-r--r--daemons/controld/controld_fencing.c1108
-rw-r--r--daemons/controld/controld_fencing.h38
-rw-r--r--daemons/controld/controld_fsa.c741
-rw-r--r--daemons/controld/controld_fsa.h694
-rw-r--r--daemons/controld/controld_globals.h143
-rw-r--r--daemons/controld/controld_join_client.c366
-rw-r--r--daemons/controld/controld_join_dc.c987
-rw-r--r--daemons/controld/controld_lrm.h188
-rw-r--r--daemons/controld/controld_matrix.c1250
-rw-r--r--daemons/controld/controld_membership.c457
-rw-r--r--daemons/controld/controld_membership.h29
-rw-r--r--daemons/controld/controld_messages.c1307
-rw-r--r--daemons/controld/controld_messages.h86
-rw-r--r--daemons/controld/controld_metadata.c320
-rw-r--r--daemons/controld/controld_metadata.h96
-rw-r--r--daemons/controld/controld_remote_ra.c1440
-rw-r--r--daemons/controld/controld_schedulerd.c506
-rw-r--r--daemons/controld/controld_te_actions.c746
-rw-r--r--daemons/controld/controld_te_callbacks.c689
-rw-r--r--daemons/controld/controld_te_events.c601
-rw-r--r--daemons/controld/controld_te_utils.c367
-rw-r--r--daemons/controld/controld_throttle.c574
-rw-r--r--daemons/controld/controld_throttle.h16
-rw-r--r--daemons/controld/controld_timers.c509
-rw-r--r--daemons/controld/controld_timers.h36
-rw-r--r--daemons/controld/controld_transition.c197
-rw-r--r--daemons/controld/controld_transition.h63
-rw-r--r--daemons/controld/controld_utils.c837
-rw-r--r--daemons/controld/controld_utils.h61
-rw-r--r--daemons/controld/pacemaker-controld.c205
-rw-r--r--daemons/controld/pacemaker-controld.h39
-rw-r--r--daemons/execd/Makefile.am76
-rw-r--r--daemons/execd/cts-exec-helper.c624
-rw-r--r--daemons/execd/execd_alerts.c205
-rw-r--r--daemons/execd/execd_commands.c1927
-rw-r--r--daemons/execd/pacemaker-execd.c582
-rw-r--r--daemons/execd/pacemaker-execd.h110
-rw-r--r--daemons/execd/pacemaker-remoted.8.inc5
-rw-r--r--daemons/execd/pacemaker_remote.in176
-rw-r--r--daemons/execd/pacemaker_remote.service.in52
-rw-r--r--daemons/execd/remoted_pidone.c298
-rw-r--r--daemons/execd/remoted_proxy.c470
-rw-r--r--daemons/execd/remoted_tls.c428
-rw-r--r--daemons/fenced/Makefile.am52
-rw-r--r--daemons/fenced/cts-fence-helper.c681
-rw-r--r--daemons/fenced/fenced_commands.c3674
-rw-r--r--daemons/fenced/fenced_history.c548
-rw-r--r--daemons/fenced/fenced_remote.c2509
-rw-r--r--daemons/fenced/pacemaker-fenced.c1751
-rw-r--r--daemons/fenced/pacemaker-fenced.h315
-rw-r--r--daemons/pacemakerd/Makefile.am37
-rw-r--r--daemons/pacemakerd/pacemaker.combined.upstart.in67
-rw-r--r--daemons/pacemakerd/pacemaker.service.in103
-rw-r--r--daemons/pacemakerd/pacemaker.upstart.in33
-rw-r--r--daemons/pacemakerd/pacemakerd.8.inc5
-rw-r--r--daemons/pacemakerd/pacemakerd.c483
-rw-r--r--daemons/pacemakerd/pacemakerd.h35
-rw-r--r--daemons/pacemakerd/pcmkd_corosync.c371
-rw-r--r--daemons/pacemakerd/pcmkd_messages.c278
-rw-r--r--daemons/pacemakerd/pcmkd_subdaemons.c888
-rw-r--r--daemons/schedulerd/Makefile.am53
-rw-r--r--daemons/schedulerd/pacemaker-schedulerd.c181
-rw-r--r--daemons/schedulerd/pacemaker-schedulerd.h20
-rw-r--r--daemons/schedulerd/schedulerd_messages.c335
100 files changed, 47254 insertions, 0 deletions
diff --git a/daemons/Makefile.am b/daemons/Makefile.am
new file mode 100644
index 0000000..743320b
--- /dev/null
+++ b/daemons/Makefile.am
@@ -0,0 +1,11 @@
+#
+# Copyright 2018 the Pacemaker project contributors
+#
+# The version control history for this file may have further details.
+#
+# This source code is licensed under the GNU General Public License version 2
+# or later (GPLv2+) WITHOUT ANY WARRANTY.
+#
+
+MAINTAINERCLEANFILES = Makefile.in
+SUBDIRS = based schedulerd attrd controld execd fenced pacemakerd
diff --git a/daemons/attrd/Makefile.am b/daemons/attrd/Makefile.am
new file mode 100644
index 0000000..6bb81c4
--- /dev/null
+++ b/daemons/attrd/Makefile.am
@@ -0,0 +1,48 @@
+#
+# Copyright 2004-2022 the Pacemaker project contributors
+#
+# The version control history for this file may have further details.
+#
+# This source code is licensed under the GNU General Public License version 2
+# or later (GPLv2+) WITHOUT ANY WARRANTY.
+#
+
+include $(top_srcdir)/mk/common.mk
+
+halibdir = $(CRM_DAEMON_DIR)
+
+halib_PROGRAMS = pacemaker-attrd
+
+noinst_HEADERS = pacemaker-attrd.h
+
+pacemaker_attrd_CFLAGS = $(CFLAGS_HARDENED_EXE)
+pacemaker_attrd_LDFLAGS = $(LDFLAGS_HARDENED_EXE)
+
+pacemaker_attrd_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \
+ $(top_builddir)/lib/pengine/libpe_rules.la \
+ $(top_builddir)/lib/common/libcrmcommon.la \
+ $(top_builddir)/lib/cib/libcib.la \
+ $(top_builddir)/lib/lrmd/liblrmd.la \
+ $(CLUSTERLIBS)
+
+pacemaker_attrd_SOURCES = attrd_alerts.c \
+ attrd_attributes.c \
+ attrd_cib.c \
+ attrd_corosync.c \
+ attrd_elections.c \
+ attrd_ipc.c \
+ attrd_messages.c \
+ attrd_sync.c \
+ attrd_utils.c \
+ pacemaker-attrd.c
+
+clean-generic:
+ rm -f *.log *.debug *.xml *~
+
+if BUILD_LEGACY_LINKS
+install-exec-hook:
+ cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f attrd && $(LN_S) pacemaker-attrd attrd
+
+uninstall-hook:
+ cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f attrd
+endif
diff --git a/daemons/attrd/attrd_alerts.c b/daemons/attrd/attrd_alerts.c
new file mode 100644
index 0000000..b694891
--- /dev/null
+++ b/daemons/attrd/attrd_alerts.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2015-2021 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include <crm/crm.h>
+#include <crm/cib/internal.h>
+#include <crm/msg_xml.h>
+#include <crm/cluster/internal.h>
+#include <crm/cluster/election_internal.h>
+#include <crm/common/alerts_internal.h>
+#include <crm/pengine/rules_internal.h>
+#include <crm/lrmd_internal.h>
+#include "pacemaker-attrd.h"
+
+static GList *attrd_alert_list = NULL;
+
+static void
+attrd_lrmd_callback(lrmd_event_data_t * op)
+{
+ CRM_CHECK(op != NULL, return);
+ switch (op->type) {
+ case lrmd_event_disconnect:
+ crm_info("Lost connection to executor");
+ attrd_lrmd_disconnect();
+ break;
+ default:
+ break;
+ }
+}
+
+static lrmd_t *
+attrd_lrmd_connect(void)
+{
+ if (the_lrmd == NULL) {
+ the_lrmd = lrmd_api_new();
+ the_lrmd->cmds->set_callback(the_lrmd, attrd_lrmd_callback);
+ }
+
+ if (!the_lrmd->cmds->is_connected(the_lrmd)) {
+ const unsigned int max_attempts = 10;
+ int ret = -ENOTCONN;
+
+ for (int fails = 0; fails < max_attempts; ++fails) {
+ ret = the_lrmd->cmds->connect(the_lrmd, T_ATTRD, NULL);
+ if (ret == pcmk_ok) {
+ break;
+ }
+
+ crm_debug("Could not connect to executor, %d tries remaining",
+ (max_attempts - fails));
+ /* @TODO We don't want to block here with sleep, but we should wait
+ * some time between connection attempts. We could possibly add a
+ * timer with a callback, but then we'd likely need an alert queue.
+ */
+ }
+
+ if (ret != pcmk_ok) {
+ attrd_lrmd_disconnect();
+ }
+ }
+
+ return the_lrmd;
+}
+
+void
+attrd_lrmd_disconnect(void) {
+ if (the_lrmd) {
+ lrmd_t *conn = the_lrmd;
+
+ the_lrmd = NULL; /* in case we're called recursively */
+ lrmd_api_delete(conn); /* will disconnect if necessary */
+ }
+}
+
+static void
+config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ xmlNode *crmalerts = NULL;
+
+ if (rc == -ENXIO) {
+ crm_debug("Local CIB has no alerts section");
+ return;
+ } else if (rc != pcmk_ok) {
+ crm_notice("Could not query local CIB: %s", pcmk_strerror(rc));
+ return;
+ }
+
+ crmalerts = output;
+ if (crmalerts && !pcmk__str_eq(crm_element_name(crmalerts), XML_CIB_TAG_ALERTS, pcmk__str_none)) {
+ crmalerts = first_named_child(crmalerts, XML_CIB_TAG_ALERTS);
+ }
+ if (!crmalerts) {
+ crm_notice("CIB query result has no " XML_CIB_TAG_ALERTS " section");
+ return;
+ }
+
+ pe_free_alert_list(attrd_alert_list);
+ attrd_alert_list = pe_unpack_alerts(crmalerts);
+}
+
+#define XPATH_ALERTS \
+ "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_ALERTS
+
+gboolean
+attrd_read_options(gpointer user_data)
+{
+ int call_id;
+
+ CRM_CHECK(the_cib != NULL, return TRUE);
+
+ call_id = the_cib->cmds->query(the_cib, XPATH_ALERTS, NULL,
+ cib_xpath | cib_scope_local);
+
+ the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, NULL,
+ "config_query_callback",
+ config_query_callback, free);
+
+ crm_trace("Querying the CIB... call %d", call_id);
+ return TRUE;
+}
+
+void
+attrd_cib_updated_cb(const char *event, xmlNode * msg)
+{
+ if (!attrd_shutting_down() && pcmk__alert_in_patchset(msg, false)) {
+ mainloop_set_trigger(attrd_config_read);
+ }
+}
+
+int
+attrd_send_attribute_alert(const char *node, int nodeid,
+ const char *attr, const char *value)
+{
+ if (attrd_alert_list == NULL) {
+ return pcmk_ok;
+ }
+ return lrmd_send_attribute_alert(attrd_lrmd_connect(), attrd_alert_list,
+ node, nodeid, attr, value);
+}
diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c
new file mode 100644
index 0000000..516ced7
--- /dev/null
+++ b/daemons/attrd/attrd_attributes.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2013-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <glib.h>
+
+#include <crm/msg_xml.h>
+#include <crm/common/logging.h>
+#include <crm/common/results.h>
+#include <crm/common/strings_internal.h>
+#include <crm/common/xml.h>
+
+#include "pacemaker-attrd.h"
+
+static attribute_t *
+attrd_create_attribute(xmlNode *xml)
+{
+ int dampen = 0;
+ const char *value = crm_element_value(xml, PCMK__XA_ATTR_DAMPENING);
+ attribute_t *a = calloc(1, sizeof(attribute_t));
+
+ CRM_ASSERT(a != NULL);
+
+ a->id = crm_element_value_copy(xml, PCMK__XA_ATTR_NAME);
+ a->set_id = crm_element_value_copy(xml, PCMK__XA_ATTR_SET);
+ a->set_type = crm_element_value_copy(xml, PCMK__XA_ATTR_SET_TYPE);
+ a->uuid = crm_element_value_copy(xml, PCMK__XA_ATTR_UUID);
+ a->values = pcmk__strikey_table(NULL, attrd_free_attribute_value);
+
+ crm_element_value_int(xml, PCMK__XA_ATTR_IS_PRIVATE, &a->is_private);
+
+ a->user = crm_element_value_copy(xml, PCMK__XA_ATTR_USER);
+ crm_trace("Performing all %s operations as user '%s'", a->id, a->user);
+
+ if (value != NULL) {
+ dampen = crm_get_msec(value);
+ }
+ crm_trace("Created attribute %s with %s write delay", a->id,
+ (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms));
+
+ if(dampen > 0) {
+ a->timeout_ms = dampen;
+ a->timer = attrd_add_timer(a->id, a->timeout_ms, a);
+ } else if (dampen < 0) {
+ crm_warn("Ignoring invalid delay %s for attribute %s", value, a->id);
+ }
+
+ g_hash_table_replace(attributes, a->id, a);
+ return a;
+}
+
+static int
+attrd_update_dampening(attribute_t *a, xmlNode *xml, const char *attr)
+{
+ const char *dvalue = crm_element_value(xml, PCMK__XA_ATTR_DAMPENING);
+ int dampen = 0;
+
+ if (dvalue == NULL) {
+ crm_warn("Could not update %s: peer did not specify value for delay",
+ attr);
+ return EINVAL;
+ }
+
+ dampen = crm_get_msec(dvalue);
+ if (dampen < 0) {
+ crm_warn("Could not update %s: invalid delay value %dms (%s)",
+ attr, dampen, dvalue);
+ return EINVAL;
+ }
+
+ if (a->timeout_ms != dampen) {
+ mainloop_timer_del(a->timer);
+ a->timeout_ms = dampen;
+ if (dampen > 0) {
+ a->timer = attrd_add_timer(attr, a->timeout_ms, a);
+ crm_info("Update attribute %s delay to %dms (%s)",
+ attr, dampen, dvalue);
+ } else {
+ a->timer = NULL;
+ crm_info("Update attribute %s to remove delay", attr);
+ }
+
+ /* If dampening changed, do an immediate write-out,
+ * otherwise repeated dampening changes would prevent write-outs
+ */
+ attrd_write_or_elect_attribute(a);
+ }
+
+ return pcmk_rc_ok;
+}
+
+GHashTable *attributes = NULL;
+
+/*!
+ * \internal
+ * \brief Create an XML representation of an attribute for use in peer messages
+ *
+ * \param[in,out] parent Create attribute XML as child element of this
+ * \param[in] a Attribute to represent
+ * \param[in] v Attribute value to represent
+ * \param[in] force_write If true, value should be written even if unchanged
+ *
+ * \return XML representation of attribute
+ */
+xmlNode *
+attrd_add_value_xml(xmlNode *parent, const attribute_t *a,
+ const attribute_value_t *v, bool force_write)
+{
+ xmlNode *xml = create_xml_node(parent, __func__);
+
+ crm_xml_add(xml, PCMK__XA_ATTR_NAME, a->id);
+ crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set_id);
+ crm_xml_add(xml, PCMK__XA_ATTR_UUID, a->uuid);
+ crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user);
+ pcmk__xe_add_node(xml, v->nodename, v->nodeid);
+ if (v->is_remote != 0) {
+ crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE, 1);
+ }
+ crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current);
+ crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, a->timeout_ms / 1000);
+ crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, a->is_private);
+ crm_xml_add_int(xml, PCMK__XA_ATTR_FORCE, force_write);
+
+ return xml;
+}
+
+void
+attrd_clear_value_seen(void)
+{
+ GHashTableIter aIter;
+ GHashTableIter vIter;
+ attribute_t *a;
+ attribute_value_t *v = NULL;
+
+ g_hash_table_iter_init(&aIter, attributes);
+ while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
+ g_hash_table_iter_init(&vIter, a->values);
+ while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
+ v->seen = FALSE;
+ crm_trace("Clear seen flag %s[%s] = %s.", a->id, v->nodename, v->current);
+ }
+ }
+}
+
+attribute_t *
+attrd_populate_attribute(xmlNode *xml, const char *attr)
+{
+ attribute_t *a = NULL;
+ bool update_both = false;
+
+ const char *op = crm_element_value(xml, PCMK__XA_TASK);
+
+ // NULL because PCMK__ATTRD_CMD_SYNC_RESPONSE has no PCMK__XA_TASK
+ update_both = pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_BOTH,
+ pcmk__str_null_matches);
+
+ // Look up or create attribute entry
+ a = g_hash_table_lookup(attributes, attr);
+ if (a == NULL) {
+ if (update_both || pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE, pcmk__str_none)) {
+ a = attrd_create_attribute(xml);
+ } else {
+ crm_warn("Could not update %s: attribute not found", attr);
+ return NULL;
+ }
+ }
+
+ // Update attribute dampening
+ if (update_both || pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) {
+ int rc = attrd_update_dampening(a, xml, attr);
+
+ if (rc != pcmk_rc_ok || !update_both) {
+ return NULL;
+ }
+ }
+
+ return a;
+}
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
new file mode 100644
index 0000000..928c013
--- /dev/null
+++ b/daemons/attrd/attrd_cib.c
@@ -0,0 +1,380 @@
+/*
+ * Copyright 2013-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <glib.h>
+
+#include <crm/msg_xml.h>
+#include <crm/common/logging.h>
+#include <crm/common/results.h>
+#include <crm/common/strings_internal.h>
+#include <crm/common/xml.h>
+
+#include "pacemaker-attrd.h"
+
+static int last_cib_op_done = 0;
+
+static gboolean
+attribute_timer_cb(gpointer data)
+{
+ attribute_t *a = data;
+ crm_trace("Dampen interval expired for %s", a->id);
+ attrd_write_or_elect_attribute(a);
+ return FALSE;
+}
+
+static void
+attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data)
+{
+ int level = LOG_ERR;
+ GHashTableIter iter;
+ const char *peer = NULL;
+ attribute_value_t *v = NULL;
+
+ char *name = user_data;
+ attribute_t *a = g_hash_table_lookup(attributes, name);
+
+ if(a == NULL) {
+ crm_info("Attribute %s no longer exists", name);
+ return;
+ }
+
+ a->update = 0;
+ if (rc == pcmk_ok && call_id < 0) {
+ rc = call_id;
+ }
+
+ switch (rc) {
+ case pcmk_ok:
+ level = LOG_INFO;
+ last_cib_op_done = call_id;
+ if (a->timer && !a->timeout_ms) {
+ // Remove temporary dampening for failed writes
+ mainloop_timer_del(a->timer);
+ a->timer = NULL;
+ }
+ break;
+
+ case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */
+ case -ETIME: /* When an attr changes while there is a DC election */
+ case -ENXIO: /* When an attr changes while the CIB is syncing a
+ * newer config from a node that just came up
+ */
+ level = LOG_WARNING;
+ break;
+ }
+
+ do_crm_log(level, "CIB update %d result for %s: %s " CRM_XS " rc=%d",
+ call_id, a->id, pcmk_strerror(rc), rc);
+
+ g_hash_table_iter_init(&iter, a->values);
+ while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) {
+ do_crm_log(level, "* %s[%s]=%s", a->id, peer, v->requested);
+ free(v->requested);
+ v->requested = NULL;
+ if (rc != pcmk_ok) {
+ a->changed = true; /* Attempt write out again */
+ }
+ }
+
+ if (a->changed && attrd_election_won()) {
+ if (rc == pcmk_ok) {
+ /* We deferred a write of a new update because this update was in
+ * progress. Write out the new value without additional delay.
+ */
+ attrd_write_attribute(a, false);
+
+ /* We're re-attempting a write because the original failed; delay
+ * the next attempt so we don't potentially flood the CIB manager
+ * and logs with a zillion attempts per second.
+ *
+ * @TODO We could elect a new writer instead. However, we'd have to
+ * somehow downgrade our vote, and we'd still need something like this
+ * if all peers similarly fail to write this attribute (which may
+ * indicate a corrupted attribute entry rather than a CIB issue).
+ */
+ } else if (a->timer) {
+ // Attribute has a dampening value, so use that as delay
+ if (!mainloop_timer_running(a->timer)) {
+ crm_trace("Delayed re-attempted write for %s by %s",
+ name, pcmk__readable_interval(a->timeout_ms));
+ mainloop_timer_start(a->timer);
+ }
+ } else {
+ /* Set a temporary dampening of 2 seconds (timer will continue
+ * to exist until the attribute's dampening gets set or the
+ * write succeeds).
+ */
+ a->timer = attrd_add_timer(a->id, 2000, a);
+ mainloop_timer_start(a->timer);
+ }
+ }
+}
+
+static void
+build_update_element(xmlNode *parent, attribute_t *a, const char *nodeid, const char *value)
+{
+ const char *set = NULL;
+ xmlNode *xml_obj = NULL;
+
+ xml_obj = create_xml_node(parent, XML_CIB_TAG_STATE);
+ crm_xml_add(xml_obj, XML_ATTR_ID, nodeid);
+
+ xml_obj = create_xml_node(xml_obj, XML_TAG_TRANSIENT_NODEATTRS);
+ crm_xml_add(xml_obj, XML_ATTR_ID, nodeid);
+
+ if (pcmk__str_eq(a->set_type, XML_TAG_ATTR_SETS, pcmk__str_null_matches)) {
+ xml_obj = create_xml_node(xml_obj, XML_TAG_ATTR_SETS);
+ } else if (pcmk__str_eq(a->set_type, XML_TAG_UTILIZATION, pcmk__str_none)) {
+ xml_obj = create_xml_node(xml_obj, XML_TAG_UTILIZATION);
+ } else {
+ crm_err("Unknown set type attribute: %s", a->set_type);
+ }
+
+ if (a->set_id) {
+ crm_xml_set_id(xml_obj, "%s", a->set_id);
+ } else {
+ crm_xml_set_id(xml_obj, "%s-%s", XML_CIB_TAG_STATUS, nodeid);
+ }
+ set = ID(xml_obj);
+
+ xml_obj = create_xml_node(xml_obj, XML_CIB_TAG_NVPAIR);
+ if (a->uuid) {
+ crm_xml_set_id(xml_obj, "%s", a->uuid);
+ } else {
+ crm_xml_set_id(xml_obj, "%s-%s", set, a->id);
+ }
+ crm_xml_add(xml_obj, XML_NVPAIR_ATTR_NAME, a->id);
+
+ if(value) {
+ crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, value);
+
+ } else {
+ crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, "");
+ crm_xml_add(xml_obj, "__delete__", XML_NVPAIR_ATTR_VALUE);
+ }
+}
+
+static void
+send_alert_attributes_value(attribute_t *a, GHashTable *t)
+{
+ int rc = 0;
+ attribute_value_t *at = NULL;
+ GHashTableIter vIter;
+
+ g_hash_table_iter_init(&vIter, t);
+
+ while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & at)) {
+ rc = attrd_send_attribute_alert(at->nodename, at->nodeid,
+ a->id, at->current);
+ crm_trace("Sent alerts for %s[%s]=%s: nodeid=%d rc=%d",
+ a->id, at->nodename, at->current, at->nodeid, rc);
+ }
+}
+
+static void
+set_alert_attribute_value(GHashTable *t, attribute_value_t *v)
+{
+ attribute_value_t *a_v = NULL;
+ a_v = calloc(1, sizeof(attribute_value_t));
+ CRM_ASSERT(a_v != NULL);
+
+ a_v->nodeid = v->nodeid;
+ a_v->nodename = strdup(v->nodename);
+ pcmk__str_update(&a_v->current, v->current);
+
+ g_hash_table_replace(t, a_v->nodename, a_v);
+}
+
+mainloop_timer_t *
+attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr)
+{
+ return mainloop_timer_add(id, timeout_ms, FALSE, attribute_timer_cb, attr);
+}
+
+void
+attrd_write_attribute(attribute_t *a, bool ignore_delay)
+{
+ int private_updates = 0, cib_updates = 0;
+ xmlNode *xml_top = NULL;
+ attribute_value_t *v = NULL;
+ GHashTableIter iter;
+ enum cib_call_options flags = cib_none;
+ GHashTable *alert_attribute_value = NULL;
+
+ if (a == NULL) {
+ return;
+ }
+
+ /* If this attribute will be written to the CIB ... */
+ if (!stand_alone && !a->is_private) {
+
+ /* Defer the write if now's not a good time */
+ CRM_CHECK(the_cib != NULL, return);
+ if (a->update && (a->update < last_cib_op_done)) {
+ crm_info("Write out of '%s' continuing: update %d considered lost", a->id, a->update);
+ a->update = 0; // Don't log this message again
+
+ } else if (a->update) {
+ crm_info("Write out of '%s' delayed: update %d in progress", a->id, a->update);
+ return;
+
+ } else if (mainloop_timer_running(a->timer)) {
+ if (ignore_delay) {
+ /* 'refresh' forces a write of the current value of all attributes
+ * Cancel any existing timers, we're writing it NOW
+ */
+ mainloop_timer_stop(a->timer);
+ crm_debug("Write out of '%s': timer is running but ignore delay", a->id);
+ } else {
+ crm_info("Write out of '%s' delayed: timer is running", a->id);
+ return;
+ }
+ }
+
+ /* Initialize the status update XML */
+ xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS);
+ }
+
+ /* Attribute will be written shortly, so clear changed flag */
+ a->changed = false;
+
+ /* We will check all peers' uuids shortly, so initialize this to false */
+ a->unknown_peer_uuids = false;
+
+ /* Attribute will be written shortly, so clear forced write flag */
+ a->force_write = FALSE;
+
+ /* Make the table for the attribute trap */
+ alert_attribute_value = pcmk__strikey_table(NULL, attrd_free_attribute_value);
+
+ /* Iterate over each peer value of this attribute */
+ g_hash_table_iter_init(&iter, a->values);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & v)) {
+ crm_node_t *peer = crm_get_peer_full(v->nodeid, v->nodename, CRM_GET_PEER_ANY);
+
+ /* If the value's peer info does not correspond to a peer, ignore it */
+ if (peer == NULL) {
+ crm_notice("Cannot update %s[%s]=%s because peer not known",
+ a->id, v->nodename, v->current);
+ continue;
+ }
+
+ /* If we're just learning the peer's node id, remember it */
+ if (peer->id && (v->nodeid == 0)) {
+ crm_trace("Learned ID %u for node %s", peer->id, v->nodename);
+ v->nodeid = peer->id;
+ }
+
+ /* If this is a private attribute, no update needs to be sent */
+ if (stand_alone || a->is_private) {
+ private_updates++;
+ continue;
+ }
+
+ /* If the peer is found, but its uuid is unknown, defer write */
+ if (peer->uuid == NULL) {
+ a->unknown_peer_uuids = true;
+ crm_notice("Cannot update %s[%s]=%s because peer UUID not known "
+ "(will retry if learned)",
+ a->id, v->nodename, v->current);
+ continue;
+ }
+
+ /* Add this value to status update XML */
+ crm_debug("Updating %s[%s]=%s (peer known as %s, UUID %s, ID %u/%u)",
+ a->id, v->nodename, v->current,
+ peer->uname, peer->uuid, peer->id, v->nodeid);
+ build_update_element(xml_top, a, peer->uuid, v->current);
+ cib_updates++;
+
+ /* Preservation of the attribute to transmit alert */
+ set_alert_attribute_value(alert_attribute_value, v);
+
+ free(v->requested);
+ v->requested = NULL;
+ if (v->current) {
+ v->requested = strdup(v->current);
+ } else {
+ /* Older attrd versions don't know about the cib_mixed_update
+ * flag so make sure it goes to the local cib which does
+ */
+ cib__set_call_options(flags, crm_system_name,
+ cib_mixed_update|cib_scope_local);
+ }
+ }
+
+ if (private_updates) {
+ crm_info("Processed %d private change%s for %s, id=%s, set=%s",
+ private_updates, pcmk__plural_s(private_updates),
+ a->id, pcmk__s(a->uuid, "n/a"), pcmk__s(a->set_id, "n/a"));
+ }
+ if (cib_updates) {
+ crm_log_xml_trace(xml_top, __func__);
+
+ a->update = cib_internal_op(the_cib, PCMK__CIB_REQUEST_MODIFY, NULL,
+ XML_CIB_TAG_STATUS, xml_top, NULL, flags,
+ a->user);
+
+ crm_info("Sent CIB request %d with %d change%s for %s (id %s, set %s)",
+ a->update, cib_updates, pcmk__plural_s(cib_updates),
+ a->id, pcmk__s(a->uuid, "n/a"), pcmk__s(a->set_id, "n/a"));
+
+ the_cib->cmds->register_callback_full(the_cib, a->update,
+ CIB_OP_TIMEOUT_S, FALSE,
+ strdup(a->id),
+ "attrd_cib_callback",
+ attrd_cib_callback, free);
+ /* Transmit alert of the attribute */
+ send_alert_attributes_value(a, alert_attribute_value);
+ }
+
+ g_hash_table_destroy(alert_attribute_value);
+ free_xml(xml_top);
+}
+
+void
+attrd_write_attributes(bool all, bool ignore_delay)
+{
+ GHashTableIter iter;
+ attribute_t *a = NULL;
+
+ crm_debug("Writing out %s attributes", all? "all" : "changed");
+ g_hash_table_iter_init(&iter, attributes);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) {
+ if (!all && a->unknown_peer_uuids) {
+ // Try writing this attribute again, in case peer ID was learned
+ a->changed = true;
+ } else if (a->force_write) {
+ /* If the force_write flag is set, write the attribute. */
+ a->changed = true;
+ }
+
+ if(all || a->changed) {
+ /* When forced write flag is set, ignore delay. */
+ attrd_write_attribute(a, (a->force_write ? true : ignore_delay));
+ } else {
+ crm_trace("Skipping unchanged attribute %s", a->id);
+ }
+ }
+}
+
+void
+attrd_write_or_elect_attribute(attribute_t *a)
+{
+ if (attrd_election_won()) {
+ attrd_write_attribute(a, false);
+ } else {
+ attrd_start_election_if_needed();
+ }
+}
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
new file mode 100644
index 0000000..ef205e6
--- /dev/null
+++ b/daemons/attrd/attrd_corosync.c
@@ -0,0 +1,620 @@
+/*
+ * Copyright 2013-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <crm/cluster.h>
+#include <crm/cluster/internal.h>
+#include <crm/common/logging.h>
+#include <crm/common/results.h>
+#include <crm/common/strings_internal.h>
+#include <crm/msg_xml.h>
+
+#include "pacemaker-attrd.h"
+
+extern crm_exit_t attrd_exit_status;
+
+static xmlNode *
+attrd_confirmation(int callid)
+{
+ xmlNode *node = create_xml_node(NULL, __func__);
+
+ crm_xml_add(node, F_TYPE, T_ATTRD);
+ crm_xml_add(node, F_ORIG, get_local_node_name());
+ crm_xml_add(node, PCMK__XA_TASK, PCMK__ATTRD_CMD_CONFIRM);
+ crm_xml_add_int(node, XML_LRM_ATTR_CALLID, callid);
+
+ return node;
+}
+
+static void
+attrd_peer_message(crm_node_t *peer, xmlNode *xml)
+{
+ const char *election_op = crm_element_value(xml, F_CRM_TASK);
+
+ if (election_op) {
+ attrd_handle_election_op(peer, xml);
+ return;
+ }
+
+ if (attrd_shutting_down()) {
+ /* If we're shutting down, we want to continue responding to election
+ * ops as long as we're a cluster member (because our vote may be
+ * needed). Ignore all other messages.
+ */
+ return;
+
+ } else {
+ pcmk__request_t request = {
+ .ipc_client = NULL,
+ .ipc_id = 0,
+ .ipc_flags = 0,
+ .peer = peer->uname,
+ .xml = xml,
+ .call_options = 0,
+ .result = PCMK__UNKNOWN_RESULT,
+ };
+
+ request.op = crm_element_value_copy(request.xml, PCMK__XA_TASK);
+ CRM_CHECK(request.op != NULL, return);
+
+ attrd_handle_request(&request);
+
+ /* Having finished handling the request, check to see if the originating
+ * peer requested confirmation. If so, send that confirmation back now.
+ */
+ if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) &&
+ !pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) {
+ int callid = 0;
+ xmlNode *reply = NULL;
+
+ /* Add the confirmation ID for the message we are confirming to the
+ * response so the originating peer knows what they're a confirmation
+ * for.
+ */
+ crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid);
+ reply = attrd_confirmation(callid);
+
+ /* And then send the confirmation back to the originating peer. This
+ * ends up right back in this same function (attrd_peer_message) on the
+ * peer where it will have to do something with a PCMK__XA_CONFIRM type
+ * message.
+ */
+ crm_debug("Sending %s a confirmation", peer->uname);
+ attrd_send_message(peer, reply, false);
+ free_xml(reply);
+ }
+
+ pcmk__reset_request(&request);
+ }
+}
+
+static void
+attrd_cpg_dispatch(cpg_handle_t handle,
+ const struct cpg_name *groupName,
+ uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
+{
+ uint32_t kind = 0;
+ xmlNode *xml = NULL;
+ const char *from = NULL;
+ char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
+
+ if(data == NULL) {
+ return;
+ }
+
+ if (kind == crm_class_cluster) {
+ xml = string2xml(data);
+ }
+
+ if (xml == NULL) {
+ crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data);
+ } else {
+ crm_node_t *peer = crm_get_peer(nodeid, from);
+
+ attrd_peer_message(peer, xml);
+ }
+
+ free_xml(xml);
+ free(data);
+}
+
+static void
+attrd_cpg_destroy(gpointer unused)
+{
+ if (attrd_shutting_down()) {
+ crm_info("Corosync disconnection complete");
+
+ } else {
+ crm_crit("Lost connection to cluster layer, shutting down");
+ attrd_exit_status = CRM_EX_DISCONNECT;
+ attrd_shutdown(0);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Override an attribute sync with a local value
+ *
+ * Broadcast the local node's value for an attribute that's different from the
+ * value provided in a peer's attribute synchronization response. This ensures a
+ * node's values for itself take precedence and all peers are kept in sync.
+ *
+ * \param[in] a Attribute entry to override
+ *
+ * \return Local instance of attribute value
+ */
+static attribute_value_t *
+broadcast_local_value(const attribute_t *a)
+{
+ attribute_value_t *v = g_hash_table_lookup(a->values, attrd_cluster->uname);
+ xmlNode *sync = create_xml_node(NULL, __func__);
+
+ crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
+ attrd_add_value_xml(sync, a, v, false);
+ attrd_send_message(NULL, sync, false);
+ free_xml(sync);
+ return v;
+}
+
+/*!
+ * \internal
+ * \brief Ensure a Pacemaker Remote node is in the correct peer cache
+ *
+ * \param[in] node_name Name of Pacemaker Remote node to check
+ */
+static void
+cache_remote_node(const char *node_name)
+{
+ /* If we previously assumed this node was an unseen cluster node,
+ * remove its entry from the cluster peer cache.
+ */
+ crm_node_t *dup = pcmk__search_cluster_node_cache(0, node_name);
+
+ if (dup && (dup->uuid == NULL)) {
+ reap_crm_member(0, node_name);
+ }
+
+ // Ensure node is in the remote peer cache
+ CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
+}
+
+#define state_text(state) pcmk__s((state), "in unknown state")
+
+/*!
+ * \internal
+ * \brief Return host's hash table entry (creating one if needed)
+ *
+ * \param[in,out] values Hash table of values
+ * \param[in] host Name of peer to look up
+ * \param[in] xml XML describing the attribute
+ *
+ * \return Pointer to new or existing hash table entry
+ */
+static attribute_value_t *
+attrd_lookup_or_create_value(GHashTable *values, const char *host,
+ const xmlNode *xml)
+{
+ attribute_value_t *v = g_hash_table_lookup(values, host);
+ int is_remote = 0;
+
+ crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
+ if (is_remote) {
+ cache_remote_node(host);
+ }
+
+ if (v == NULL) {
+ v = calloc(1, sizeof(attribute_value_t));
+ CRM_ASSERT(v != NULL);
+
+ pcmk__str_update(&v->nodename, host);
+ v->is_remote = is_remote;
+ g_hash_table_replace(values, v->nodename, v);
+ }
+ return(v);
+}
+
+static void
+attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data)
+{
+ bool gone = false;
+ bool is_remote = pcmk_is_set(peer->flags, crm_remote_node);
+
+ switch (kind) {
+ case crm_status_uname:
+ crm_debug("%s node %s is now %s",
+ (is_remote? "Remote" : "Cluster"),
+ peer->uname, state_text(peer->state));
+ break;
+
+ case crm_status_processes:
+ if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
+ gone = true;
+ }
+ crm_debug("Node %s is %s a peer",
+ peer->uname, (gone? "no longer" : "now"));
+ break;
+
+ case crm_status_nstate:
+ crm_debug("%s node %s is now %s (was %s)",
+ (is_remote? "Remote" : "Cluster"),
+ peer->uname, state_text(peer->state), state_text(data));
+ if (pcmk__str_eq(peer->state, CRM_NODE_MEMBER, pcmk__str_casei)) {
+ /* If we're the writer, send new peers a list of all attributes
+ * (unless it's a remote node, which doesn't run its own attrd)
+ */
+ if (attrd_election_won()
+ && !pcmk_is_set(peer->flags, crm_remote_node)) {
+ attrd_peer_sync(peer, NULL);
+ }
+ } else {
+ // Remove all attribute values associated with lost nodes
+ attrd_peer_remove(peer->uname, false, "loss");
+ gone = true;
+ }
+ break;
+ }
+
+ // Remove votes from cluster nodes that leave, in case election in progress
+ if (gone && !is_remote) {
+ attrd_remove_voter(peer);
+ attrd_remove_peer_protocol_ver(peer->uname);
+ attrd_do_not_expect_from_peer(peer->uname);
+
+ // Ensure remote nodes that come up are in the remote node cache
+ } else if (!gone && is_remote) {
+ cache_remote_node(peer->uname);
+ }
+}
+
+static void
+record_peer_nodeid(attribute_value_t *v, const char *host)
+{
+ crm_node_t *known_peer = crm_get_peer(v->nodeid, host);
+
+ crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid);
+ if (attrd_election_won()) {
+ attrd_write_attributes(false, false);
+ }
+}
+
+static void
+update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml,
+ const char *attr, const char *value, const char *host,
+ bool filter, int is_force_write)
+{
+ attribute_value_t *v = NULL;
+
+ v = attrd_lookup_or_create_value(a->values, host, xml);
+
+ if (filter && !pcmk__str_eq(v->current, value, pcmk__str_casei)
+ && pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)) {
+
+ crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s",
+ attr, host, v->current, value, peer->uname);
+ v = broadcast_local_value(a);
+
+ } else if (!pcmk__str_eq(v->current, value, pcmk__str_casei)) {
+ crm_notice("Setting %s[%s]%s%s: %s -> %s "
+ CRM_XS " from %s with %s write delay",
+ attr, host, a->set_type ? " in " : "",
+ pcmk__s(a->set_type, ""), pcmk__s(v->current, "(unset)"),
+ pcmk__s(value, "(unset)"), peer->uname,
+ (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms));
+ pcmk__str_update(&v->current, value);
+ a->changed = true;
+
+ if (pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)
+ && pcmk__str_eq(attr, XML_CIB_ATTR_SHUTDOWN, pcmk__str_none)) {
+
+ if (!pcmk__str_eq(value, "0", pcmk__str_null_matches)) {
+ attrd_set_requesting_shutdown();
+
+ } else {
+ attrd_clear_requesting_shutdown();
+ }
+ }
+
+ // Write out new value or start dampening timer
+ if (a->timeout_ms && a->timer) {
+ crm_trace("Delayed write out (%dms) for %s", a->timeout_ms, attr);
+ mainloop_timer_start(a->timer);
+ } else {
+ attrd_write_or_elect_attribute(a);
+ }
+
+ } else {
+ if (is_force_write == 1 && a->timeout_ms && a->timer) {
+ /* Save forced writing and set change flag. */
+ /* The actual attribute is written by Writer after election. */
+ crm_trace("Unchanged %s[%s] from %s is %s(Set the forced write flag)",
+ attr, host, peer->uname, value);
+ a->force_write = TRUE;
+ } else {
+ crm_trace("Unchanged %s[%s] from %s is %s", attr, host, peer->uname, value);
+ }
+ }
+
+ /* Set the seen flag for attribute processing held only in the own node. */
+ v->seen = TRUE;
+
+ /* If this is a cluster node whose node ID we are learning, remember it */
+ if ((v->nodeid == 0) && (v->is_remote == FALSE)
+ && (crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID,
+ (int*)&v->nodeid) == 0) && (v->nodeid > 0)) {
+ record_peer_nodeid(v, host);
+ }
+}
+
+static void
+attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter)
+{
+ attribute_t *a = NULL;
+ const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
+ const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
+ const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME);
+ int is_force_write = 0;
+
+ if (attr == NULL) {
+ crm_warn("Could not update attribute: peer did not specify name");
+ return;
+ }
+
+ crm_element_value_int(xml, PCMK__XA_ATTR_FORCE, &is_force_write);
+
+ a = attrd_populate_attribute(xml, attr);
+ if (a == NULL) {
+ return;
+ }
+
+ if (host == NULL) {
+ // If no host was specified, update all hosts
+ GHashTableIter vIter;
+
+ crm_debug("Setting %s for all hosts to %s", attr, value);
+ xml_remove_prop(xml, PCMK__XA_ATTR_NODE_ID);
+ g_hash_table_iter_init(&vIter, a->values);
+
+ while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) {
+ update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write);
+ }
+
+ } else {
+ // Update attribute value for the given host
+ update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write);
+ }
+
+ /* If this is a message from some attrd instance broadcasting its protocol
+ * version, check to see if it's a new minimum version.
+ */
+ if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) {
+ attrd_update_minimum_protocol_ver(peer->uname, value);
+ }
+}
+
+static void
+broadcast_unseen_local_values(void)
+{
+ GHashTableIter aIter;
+ GHashTableIter vIter;
+ attribute_t *a = NULL;
+ attribute_value_t *v = NULL;
+ xmlNode *sync = NULL;
+
+ g_hash_table_iter_init(&aIter, attributes);
+ while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
+ g_hash_table_iter_init(&vIter, a->values);
+ while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
+ if (!(v->seen) && pcmk__str_eq(v->nodename, attrd_cluster->uname,
+ pcmk__str_casei)) {
+ if (sync == NULL) {
+ sync = create_xml_node(NULL, __func__);
+ crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
+ }
+ attrd_add_value_xml(sync, a, v, a->timeout_ms && a->timer);
+ }
+ }
+ }
+
+ if (sync != NULL) {
+ crm_debug("Broadcasting local-only values");
+ attrd_send_message(NULL, sync, false);
+ free_xml(sync);
+ }
+}
+
+int
+attrd_cluster_connect(void)
+{
+ attrd_cluster = pcmk_cluster_new();
+
+ attrd_cluster->destroy = attrd_cpg_destroy;
+ attrd_cluster->cpg.cpg_deliver_fn = attrd_cpg_dispatch;
+ attrd_cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership;
+
+ crm_set_status_callback(&attrd_peer_change_cb);
+
+ if (crm_cluster_connect(attrd_cluster) == FALSE) {
+ crm_err("Cluster connection failed");
+ return -ENOTCONN;
+ }
+ return pcmk_ok;
+}
+
+void
+attrd_peer_clear_failure(pcmk__request_t *request)
+{
+ xmlNode *xml = request->xml;
+ const char *rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE);
+ const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME);
+ const char *op = crm_element_value(xml, PCMK__XA_ATTR_OPERATION);
+ const char *interval_spec = crm_element_value(xml, PCMK__XA_ATTR_INTERVAL);
+ guint interval_ms = crm_parse_interval_spec(interval_spec);
+ char *attr = NULL;
+ GHashTableIter iter;
+ regex_t regex;
+
+ crm_node_t *peer = crm_get_peer(0, request->peer);
+
+ if (attrd_failure_regex(&regex, rsc, op, interval_ms) != pcmk_ok) {
+ crm_info("Ignoring invalid request to clear failures for %s",
+ pcmk__s(rsc, "all resources"));
+ return;
+ }
+
+ crm_xml_add(xml, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE);
+
+ /* Make sure value is not set, so we delete */
+ if (crm_element_value(xml, PCMK__XA_ATTR_VALUE)) {
+ crm_xml_replace(xml, PCMK__XA_ATTR_VALUE, NULL);
+ }
+
+ g_hash_table_iter_init(&iter, attributes);
+ while (g_hash_table_iter_next(&iter, (gpointer *) &attr, NULL)) {
+ if (regexec(&regex, attr, 0, NULL, 0) == 0) {
+ crm_trace("Matched %s when clearing %s",
+ attr, pcmk__s(rsc, "all resources"));
+ crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr);
+ attrd_peer_update(peer, xml, host, false);
+ }
+ }
+ regfree(&regex);
+}
+
+/*!
+ * \internal
+ * \brief Load attributes from a peer sync response
+ *
+ * \param[in] peer Peer that sent clear request
+ * \param[in] peer_won Whether peer is the attribute writer
+ * \param[in,out] xml Request XML
+ */
+void
+attrd_peer_sync_response(const crm_node_t *peer, bool peer_won, xmlNode *xml)
+{
+ crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s",
+ peer->uname);
+
+ if (peer_won) {
+ /* Initialize the "seen" flag for all attributes to cleared, so we can
+ * detect attributes that local node has but the writer doesn't.
+ */
+ attrd_clear_value_seen();
+ }
+
+ // Process each attribute update in the sync response
+ for (xmlNode *child = pcmk__xml_first_child(xml); child != NULL;
+ child = pcmk__xml_next(child)) {
+ attrd_peer_update(peer, child,
+ crm_element_value(child, PCMK__XA_ATTR_NODE_NAME),
+ true);
+ }
+
+ if (peer_won) {
+ /* If any attributes are still not marked as seen, the writer doesn't
+ * know about them, so send all peers an update with them.
+ */
+ broadcast_unseen_local_values();
+ }
+}
+
+/*!
+ * \internal
+ * \brief Remove all attributes and optionally peer cache entries for a node
+ *
+ * \param[in] host Name of node to purge
+ * \param[in] uncache If true, remove node from peer caches
+ * \param[in] source Who requested removal (only used for logging)
+ */
+void
+attrd_peer_remove(const char *host, bool uncache, const char *source)
+{
+ attribute_t *a = NULL;
+ GHashTableIter aIter;
+
+ CRM_CHECK(host != NULL, return);
+ crm_notice("Removing all %s attributes for peer %s", host, source);
+
+ g_hash_table_iter_init(&aIter, attributes);
+ while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
+ if(g_hash_table_remove(a->values, host)) {
+ crm_debug("Removed %s[%s] for peer %s", a->id, host, source);
+ }
+ }
+
+ if (uncache) {
+ crm_remote_peer_cache_remove(host);
+ reap_crm_member(0, host);
+ }
+}
+
+void
+attrd_peer_sync(crm_node_t *peer, xmlNode *xml)
+{
+ GHashTableIter aIter;
+ GHashTableIter vIter;
+
+ attribute_t *a = NULL;
+ attribute_value_t *v = NULL;
+ xmlNode *sync = create_xml_node(NULL, __func__);
+
+ crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
+
+ g_hash_table_iter_init(&aIter, attributes);
+ while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
+ g_hash_table_iter_init(&vIter, a->values);
+ while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
+ crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone");
+ attrd_add_value_xml(sync, a, v, false);
+ }
+ }
+
+ crm_debug("Syncing values to %s", peer?peer->uname:"everyone");
+ attrd_send_message(peer, sync, false);
+ free_xml(sync);
+}
+
+void
+attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host,
+ bool filter)
+{
+ bool handle_sync_point = false;
+
+ if (xml_has_children(xml)) {
+ for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL;
+ child = crm_next_same_xml(child)) {
+ attrd_copy_xml_attributes(xml, child);
+ attrd_peer_update_one(peer, child, filter);
+
+ if (attrd_request_has_sync_point(child)) {
+ handle_sync_point = true;
+ }
+ }
+
+ } else {
+ attrd_peer_update_one(peer, xml, filter);
+
+ if (attrd_request_has_sync_point(xml)) {
+ handle_sync_point = true;
+ }
+ }
+
+ /* If the update XML specified that the client wanted to wait for a sync
+ * point, process that now.
+ */
+ if (handle_sync_point) {
+ crm_trace("Hit local sync point for attribute update");
+ attrd_ack_waitlist_clients(attrd_sync_point_local, xml);
+ }
+}
diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c
new file mode 100644
index 0000000..3b6b55a
--- /dev/null
+++ b/daemons/attrd/attrd_elections.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2013-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include <crm/msg_xml.h>
+#include <crm/cluster.h>
+#include <crm/cluster/election_internal.h>
+
+#include "pacemaker-attrd.h"
+
+static char *peer_writer = NULL;
+static election_t *writer = NULL;
+
+static gboolean
+attrd_election_cb(gpointer user_data)
+{
+ attrd_declare_winner();
+
+ /* Update the peers after an election */
+ attrd_peer_sync(NULL, NULL);
+
+ /* Update the CIB after an election */
+ attrd_write_attributes(true, false);
+ return FALSE;
+}
+
+void
+attrd_election_init(void)
+{
+ writer = election_init(T_ATTRD, attrd_cluster->uname, 120000,
+ attrd_election_cb);
+}
+
+void
+attrd_election_fini(void)
+{
+ election_fini(writer);
+}
+
+void
+attrd_start_election_if_needed(void)
+{
+ if ((peer_writer == NULL)
+ && (election_state(writer) != election_in_progress)
+ && !attrd_shutting_down()) {
+
+ crm_info("Starting an election to determine the writer");
+ election_vote(writer);
+ }
+}
+
+bool
+attrd_election_won(void)
+{
+ return (election_state(writer) == election_won);
+}
+
+void
+attrd_handle_election_op(const crm_node_t *peer, xmlNode *xml)
+{
+ enum election_result rc = 0;
+ enum election_result previous = election_state(writer);
+
+ crm_xml_add(xml, F_CRM_HOST_FROM, peer->uname);
+
+ // Don't become writer if we're shutting down
+ rc = election_count_vote(writer, xml, !attrd_shutting_down());
+
+ switch(rc) {
+ case election_start:
+ crm_debug("Unsetting writer (was %s) and starting new election",
+ peer_writer? peer_writer : "unset");
+ free(peer_writer);
+ peer_writer = NULL;
+ election_vote(writer);
+ break;
+
+ case election_lost:
+ /* The election API should really distinguish between "we just lost
+ * to this peer" and "we already lost previously, and we are
+ * discarding this vote for some reason", but it doesn't.
+ *
+ * In the first case, we want to tentatively set the peer writer to
+ * this peer, even though another peer may eventually win (which we
+ * will learn via attrd_check_for_new_writer()), so
+ * attrd_start_election_if_needed() doesn't start a new election.
+ *
+ * Approximate a test for that case as best as possible.
+ */
+ if ((peer_writer == NULL) || (previous != election_lost)) {
+ pcmk__str_update(&peer_writer, peer->uname);
+ crm_debug("Election lost, presuming %s is writer for now",
+ peer_writer);
+ }
+ break;
+
+ case election_in_progress:
+ election_check(writer);
+ break;
+
+ default:
+ crm_info("Ignoring election op from %s due to error", peer->uname);
+ break;
+ }
+}
+
+bool
+attrd_check_for_new_writer(const crm_node_t *peer, const xmlNode *xml)
+{
+ int peer_state = 0;
+
+ crm_element_value_int(xml, PCMK__XA_ATTR_WRITER, &peer_state);
+ if (peer_state == election_won) {
+ if ((election_state(writer) == election_won)
+ && !pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) {
+ crm_notice("Detected another attribute writer (%s), starting new election",
+ peer->uname);
+ election_vote(writer);
+
+ } else if (!pcmk__str_eq(peer->uname, peer_writer, pcmk__str_casei)) {
+ crm_notice("Recorded new attribute writer: %s (was %s)",
+ peer->uname, (peer_writer? peer_writer : "unset"));
+ pcmk__str_update(&peer_writer, peer->uname);
+ }
+ }
+ return (peer_state == election_won);
+}
+
+void
+attrd_declare_winner(void)
+{
+ crm_notice("Recorded local node as attribute writer (was %s)",
+ (peer_writer? peer_writer : "unset"));
+ pcmk__str_update(&peer_writer, attrd_cluster->uname);
+}
+
+void
+attrd_remove_voter(const crm_node_t *peer)
+{
+ election_remove(writer, peer->uname);
+ if (peer_writer && pcmk__str_eq(peer->uname, peer_writer, pcmk__str_casei)) {
+ free(peer_writer);
+ peer_writer = NULL;
+ crm_notice("Lost attribute writer %s", peer->uname);
+
+ /* Clear any election dampening in effect. Otherwise, if the lost writer
+ * had just won, the election could fizzle out with no new writer.
+ */
+ election_clear_dampening(writer);
+
+ /* If the writer received attribute updates during its shutdown, it will
+ * not have written them to the CIB. Ensure we get a new writer so they
+ * are written out. This means that every node that sees the writer
+ * leave will start a new election, but that's better than losing
+ * attributes.
+ */
+ attrd_start_election_if_needed();
+
+ /* If an election is in progress, we need to call election_check(), in case
+ * this lost peer is the only one that hasn't voted, otherwise the election
+ * would be pending until it's timed out.
+ */
+ } else if (election_state(writer) == election_in_progress) {
+ crm_debug("Checking election status upon loss of voter %s", peer->uname);
+ election_check(writer);
+ }
+}
+
+void
+attrd_xml_add_writer(xmlNode *xml)
+{
+ crm_xml_add_int(xml, PCMK__XA_ATTR_WRITER, election_state(writer));
+}
diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c
new file mode 100644
index 0000000..9d3dfff
--- /dev/null
+++ b/daemons/attrd/attrd_ipc.c
@@ -0,0 +1,628 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <errno.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include <crm/cluster.h>
+#include <crm/cluster/internal.h>
+#include <crm/msg_xml.h>
+#include <crm/common/acl_internal.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/common/logging.h>
+#include <crm/common/results.h>
+#include <crm/common/strings_internal.h>
+#include <crm/common/util.h>
+
+#include "pacemaker-attrd.h"
+
+static qb_ipcs_service_t *ipcs = NULL;
+
+/*!
+ * \internal
+ * \brief Build the XML reply to a client query
+ *
+ * param[in] attr Name of requested attribute
+ * param[in] host Name of requested host (or NULL for all hosts)
+ *
+ * \return New XML reply
+ * \note Caller is responsible for freeing the resulting XML
+ */
+static xmlNode *build_query_reply(const char *attr, const char *host)
+{
+ xmlNode *reply = create_xml_node(NULL, __func__);
+ attribute_t *a;
+
+ if (reply == NULL) {
+ return NULL;
+ }
+ crm_xml_add(reply, F_TYPE, T_ATTRD);
+ crm_xml_add(reply, F_SUBTYPE, PCMK__ATTRD_CMD_QUERY);
+ crm_xml_add(reply, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION);
+
+ /* If desired attribute exists, add its value(s) to the reply */
+ a = g_hash_table_lookup(attributes, attr);
+ if (a) {
+ attribute_value_t *v;
+ xmlNode *host_value;
+
+ crm_xml_add(reply, PCMK__XA_ATTR_NAME, attr);
+
+ /* Allow caller to use "localhost" to refer to local node */
+ if (pcmk__str_eq(host, "localhost", pcmk__str_casei)) {
+ host = attrd_cluster->uname;
+ crm_trace("Mapped localhost to %s", host);
+ }
+
+ /* If a specific node was requested, add its value */
+ if (host) {
+ v = g_hash_table_lookup(a->values, host);
+ host_value = create_xml_node(reply, XML_CIB_TAG_NODE);
+ if (host_value == NULL) {
+ free_xml(reply);
+ return NULL;
+ }
+ pcmk__xe_add_node(host_value, host, 0);
+ crm_xml_add(host_value, PCMK__XA_ATTR_VALUE,
+ (v? v->current : NULL));
+
+ /* Otherwise, add all nodes' values */
+ } else {
+ GHashTableIter iter;
+
+ g_hash_table_iter_init(&iter, a->values);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) {
+ host_value = create_xml_node(reply, XML_CIB_TAG_NODE);
+ if (host_value == NULL) {
+ free_xml(reply);
+ return NULL;
+ }
+ pcmk__xe_add_node(host_value, v->nodename, 0);
+ crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, v->current);
+ }
+ }
+ }
+ return reply;
+}
+
+xmlNode *
+attrd_client_clear_failure(pcmk__request_t *request)
+{
+ xmlNode *xml = request->xml;
+ const char *rsc, *op, *interval_spec;
+
+ if (minimum_protocol_version >= 2) {
+ /* Propagate to all peers (including ourselves).
+ * This ends up at attrd_peer_message().
+ */
+ attrd_send_message(NULL, xml, false);
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ return NULL;
+ }
+
+ rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE);
+ op = crm_element_value(xml, PCMK__XA_ATTR_OPERATION);
+ interval_spec = crm_element_value(xml, PCMK__XA_ATTR_INTERVAL);
+
+ /* Map this to an update */
+ crm_xml_add(xml, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE);
+
+ /* Add regular expression matching desired attributes */
+
+ if (rsc) {
+ char *pattern;
+
+ if (op == NULL) {
+ pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc);
+
+ } else {
+ guint interval_ms = crm_parse_interval_spec(interval_spec);
+
+ pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP,
+ rsc, op, interval_ms);
+ }
+
+ crm_xml_add(xml, PCMK__XA_ATTR_PATTERN, pattern);
+ free(pattern);
+
+ } else {
+ crm_xml_add(xml, PCMK__XA_ATTR_PATTERN, ATTRD_RE_CLEAR_ALL);
+ }
+
+ /* Make sure attribute and value are not set, so we delete via regex */
+ if (crm_element_value(xml, PCMK__XA_ATTR_NAME)) {
+ crm_xml_replace(xml, PCMK__XA_ATTR_NAME, NULL);
+ }
+ if (crm_element_value(xml, PCMK__XA_ATTR_VALUE)) {
+ crm_xml_replace(xml, PCMK__XA_ATTR_VALUE, NULL);
+ }
+
+ return attrd_client_update(request);
+}
+
+xmlNode *
+attrd_client_peer_remove(pcmk__request_t *request)
+{
+ xmlNode *xml = request->xml;
+
+ // Host and ID are not used in combination, rather host has precedence
+ const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME);
+ char *host_alloc = NULL;
+
+ attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags);
+
+ if (host == NULL) {
+ int nodeid = 0;
+
+ crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID, &nodeid);
+ if (nodeid > 0) {
+ crm_node_t *node = pcmk__search_cluster_node_cache(nodeid, NULL);
+ char *host_alloc = NULL;
+
+ if (node && node->uname) {
+ // Use cached name if available
+ host = node->uname;
+ } else {
+ // Otherwise ask cluster layer
+ host_alloc = get_node_name(nodeid);
+ host = host_alloc;
+ }
+ pcmk__xe_add_node(xml, host, 0);
+ }
+ }
+
+ if (host) {
+ crm_info("Client %s is requesting all values for %s be removed",
+ pcmk__client_name(request->ipc_client), host);
+ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */
+ free(host_alloc);
+ } else {
+ crm_info("Ignoring request by client %s to remove all peer values without specifying peer",
+ pcmk__client_name(request->ipc_client));
+ }
+
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ return NULL;
+}
+
+xmlNode *
+attrd_client_query(pcmk__request_t *request)
+{
+ xmlNode *query = request->xml;
+ xmlNode *reply = NULL;
+ const char *attr = NULL;
+
+ crm_debug("Query arrived from %s", pcmk__client_name(request->ipc_client));
+
+ /* Request must specify attribute name to query */
+ attr = crm_element_value(query, PCMK__XA_ATTR_NAME);
+ if (attr == NULL) {
+ pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
+ "Ignoring malformed query from %s (no attribute name given)",
+ pcmk__client_name(request->ipc_client));
+ return NULL;
+ }
+
+ /* Build the XML reply */
+ reply = build_query_reply(attr, crm_element_value(query,
+ PCMK__XA_ATTR_NODE_NAME));
+ if (reply == NULL) {
+ pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
+ "Could not respond to query from %s: could not create XML reply",
+ pcmk__client_name(request->ipc_client));
+ return NULL;
+ } else {
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ }
+
+ request->ipc_client->request_id = 0;
+ return reply;
+}
+
+xmlNode *
+attrd_client_refresh(pcmk__request_t *request)
+{
+ crm_info("Updating all attributes");
+
+ attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags);
+ attrd_write_attributes(true, true);
+
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ return NULL;
+}
+
+static void
+handle_missing_host(xmlNode *xml)
+{
+ const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME);
+
+ if (host == NULL) {
+ crm_trace("Inferring host");
+ pcmk__xe_add_node(xml, attrd_cluster->uname, attrd_cluster->nodeid);
+ }
+}
+
+/* Convert a single IPC message with a regex into one with multiple children, one
+ * for each regex match.
+ */
+static int
+expand_regexes(xmlNode *xml, const char *attr, const char *value, const char *regex)
+{
+ if (attr == NULL && regex) {
+ bool matched = false;
+ GHashTableIter aIter;
+ regex_t r_patt;
+
+ crm_debug("Setting %s to %s", regex, value);
+ if (regcomp(&r_patt, regex, REG_EXTENDED|REG_NOSUB)) {
+ return EINVAL;
+ }
+
+ g_hash_table_iter_init(&aIter, attributes);
+ while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) {
+ int status = regexec(&r_patt, attr, 0, NULL, 0);
+
+ if (status == 0) {
+ xmlNode *child = create_xml_node(xml, XML_ATTR_OP);
+
+ crm_trace("Matched %s with %s", attr, regex);
+ matched = true;
+
+ /* Copy all the attributes from the parent over, but remove the
+ * regex and replace it with the name.
+ */
+ attrd_copy_xml_attributes(xml, child);
+ crm_xml_replace(child, PCMK__XA_ATTR_PATTERN, NULL);
+ crm_xml_add(child, PCMK__XA_ATTR_NAME, attr);
+ }
+ }
+
+ regfree(&r_patt);
+
+ /* Return a code if we never matched anything. This should not be treated
+ * as an error. It indicates there was a regex, and it was a valid regex,
+ * but simply did not match anything and the caller should not continue
+ * doing any regex-related processing.
+ */
+ if (!matched) {
+ return pcmk_rc_op_unsatisfied;
+ }
+
+ } else if (attr == NULL) {
+ return pcmk_rc_bad_nvpair;
+ }
+
+ return pcmk_rc_ok;
+}
+
+static int
+handle_regexes(pcmk__request_t *request)
+{
+ xmlNode *xml = request->xml;
+ int rc = pcmk_rc_ok;
+
+ const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
+ const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
+ const char *regex = crm_element_value(xml, PCMK__XA_ATTR_PATTERN);
+
+ rc = expand_regexes(xml, attr, value, regex);
+
+ if (rc == EINVAL) {
+ pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
+ "Bad regex '%s' for update from client %s", regex,
+ pcmk__client_name(request->ipc_client));
+
+ } else if (rc == pcmk_rc_bad_nvpair) {
+ crm_err("Update request did not specify attribute or regular expression");
+ pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
+ "Client %s update request did not specify attribute or regular expression",
+ pcmk__client_name(request->ipc_client));
+ }
+
+ return rc;
+}
+
+static int
+handle_value_expansion(const char **value, xmlNode *xml, const char *op,
+ const char *attr)
+{
+ attribute_t *a = g_hash_table_lookup(attributes, attr);
+
+ if (a == NULL && pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) {
+ return EINVAL;
+ }
+
+ if (*value && attrd_value_needs_expansion(*value)) {
+ int int_value;
+ attribute_value_t *v = NULL;
+
+ if (a) {
+ const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME);
+ v = g_hash_table_lookup(a->values, host);
+ }
+
+ int_value = attrd_expand_value(*value, (v? v->current : NULL));
+
+ crm_info("Expanded %s=%s to %d", attr, *value, int_value);
+ crm_xml_add_int(xml, PCMK__XA_ATTR_VALUE, int_value);
+
+ /* Replacing the value frees the previous memory, so re-query it */
+ *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
+ }
+
+ return pcmk_rc_ok;
+}
+
+static void
+send_update_msg_to_cluster(pcmk__request_t *request, xmlNode *xml)
+{
+ if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) {
+ /* The client is waiting on the cluster-wide sync point. In this case,
+ * the response ACK is not sent until this attrd broadcasts the update
+ * and receives its own confirmation back from all peers.
+ */
+ attrd_expect_confirmations(request, attrd_cluster_sync_point_update);
+ attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */
+
+ } else {
+ /* The client is either waiting on the local sync point or was not
+ * waiting on any sync point at all. For the local sync point, the
+ * response ACK is sent in attrd_peer_update. For clients not
+ * waiting on any sync point, the response ACK is sent in
+ * handle_update_request immediately before this function was called.
+ */
+ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */
+ }
+}
+
+static int
+send_child_update(xmlNode *child, void *data)
+{
+ pcmk__request_t *request = (pcmk__request_t *) data;
+
+ /* Calling pcmk__set_result is handled by one of these calls to
+ * attrd_client_update, so no need to do it again here.
+ */
+ request->xml = child;
+ attrd_client_update(request);
+ return pcmk_rc_ok;
+}
+
+xmlNode *
+attrd_client_update(pcmk__request_t *request)
+{
+ xmlNode *xml = request->xml;
+ const char *attr, *value, *regex;
+
+ /* If the message has children, that means it is a message from a newer
+ * client that supports sending multiple operations at a time. There are
+ * two ways we can handle that.
+ */
+ if (xml_has_children(xml)) {
+ if (ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version)) {
+ /* First, if all peers support a certain protocol version, we can
+ * just broadcast the big message and they'll handle it. However,
+ * we also need to apply all the transformations in this function
+ * to the children since they don't happen anywhere else.
+ */
+ for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL;
+ child = crm_next_same_xml(child)) {
+ attr = crm_element_value(child, PCMK__XA_ATTR_NAME);
+ value = crm_element_value(child, PCMK__XA_ATTR_VALUE);
+
+ handle_missing_host(child);
+
+ if (handle_value_expansion(&value, child, request->op, attr) == EINVAL) {
+ pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR,
+ "Attribute %s does not exist", attr);
+ return NULL;
+ }
+ }
+
+ send_update_msg_to_cluster(request, xml);
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+
+ } else {
+ /* Save the original xml node pointer so it can be restored after iterating
+ * over all the children.
+ */
+ xmlNode *orig_xml = request->xml;
+
+ /* Second, if they do not support that protocol version, split it
+ * up into individual messages and call attrd_client_update on
+ * each one.
+ */
+ pcmk__xe_foreach_child(xml, XML_ATTR_OP, send_child_update, request);
+ request->xml = orig_xml;
+ }
+
+ return NULL;
+ }
+
+ attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
+ value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
+ regex = crm_element_value(xml, PCMK__XA_ATTR_PATTERN);
+
+ if (handle_regexes(request) != pcmk_rc_ok) {
+ /* Error handling was already dealt with in handle_regexes, so just return. */
+ return NULL;
+ } else if (regex) {
+ /* Recursively call attrd_client_update on the new message with regexes
+ * expanded. If supported by the attribute daemon, this means that all
+ * matches can also be handled atomically.
+ */
+ return attrd_client_update(request);
+ }
+
+ handle_missing_host(xml);
+
+ if (handle_value_expansion(&value, xml, request->op, attr) == EINVAL) {
+ pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR,
+ "Attribute %s does not exist", attr);
+ return NULL;
+ }
+
+ crm_debug("Broadcasting %s[%s]=%s%s", attr, crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME),
+ value, (attrd_election_won()? " (writer)" : ""));
+
+ send_update_msg_to_cluster(request, xml);
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ return NULL;
+}
+
+/*!
+ * \internal
+ * \brief Accept a new client IPC connection
+ *
+ * \param[in,out] c New connection
+ * \param[in] uid Client user id
+ * \param[in] gid Client group id
+ *
+ * \return pcmk_ok on success, -errno otherwise
+ */
+static int32_t
+attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
+{
+ crm_trace("New client connection %p", c);
+ if (attrd_shutting_down()) {
+ crm_info("Ignoring new connection from pid %d during shutdown",
+ pcmk__client_pid(c));
+ return -EPERM;
+ }
+
+ if (pcmk__new_client(c, uid, gid) == NULL) {
+ return -EIO;
+ }
+ return pcmk_ok;
+}
+
+/*!
+ * \internal
+ * \brief Destroy a client IPC connection
+ *
+ * \param[in] c Connection to destroy
+ *
+ * \return FALSE (i.e. do not re-run this callback)
+ */
+static int32_t
+attrd_ipc_closed(qb_ipcs_connection_t *c)
+{
+ pcmk__client_t *client = pcmk__find_client(c);
+
+ if (client == NULL) {
+ crm_trace("Ignoring request to clean up unknown connection %p", c);
+ } else {
+ crm_trace("Cleaning up closed client connection %p", c);
+
+ /* Remove the client from the sync point waitlist if it's present. */
+ attrd_remove_client_from_waitlist(client);
+
+ /* And no longer wait for confirmations from any peers. */
+ attrd_do_not_wait_for_client(client);
+
+ pcmk__free_client(client);
+ }
+
+ return FALSE;
+}
+
+/*!
+ * \internal
+ * \brief Destroy a client IPC connection
+ *
+ * \param[in,out] c Connection to destroy
+ *
+ * \note We handle a destroyed connection the same as a closed one,
+ * but we need a separate handler because the return type is different.
+ */
+static void
+attrd_ipc_destroy(qb_ipcs_connection_t *c)
+{
+ crm_trace("Destroying client connection %p", c);
+ attrd_ipc_closed(c);
+}
+
+static int32_t
+attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
+{
+ uint32_t id = 0;
+ uint32_t flags = 0;
+ pcmk__client_t *client = pcmk__find_client(c);
+ xmlNode *xml = NULL;
+
+ // Sanity-check, and parse XML from IPC data
+ CRM_CHECK((c != NULL) && (client != NULL), return 0);
+ if (data == NULL) {
+ crm_debug("No IPC data from PID %d", pcmk__client_pid(c));
+ return 0;
+ }
+
+ xml = pcmk__client_data2xml(client, data, &id, &flags);
+
+ if (xml == NULL) {
+ crm_debug("Unrecognizable IPC data from PID %d", pcmk__client_pid(c));
+ pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL);
+ return 0;
+
+ } else {
+ pcmk__request_t request = {
+ .ipc_client = client,
+ .ipc_id = id,
+ .ipc_flags = flags,
+ .peer = NULL,
+ .xml = xml,
+ .call_options = 0,
+ .result = PCMK__UNKNOWN_RESULT,
+ };
+
+ CRM_ASSERT(client->user != NULL);
+ pcmk__update_acl_user(xml, PCMK__XA_ATTR_USER, client->user);
+
+ request.op = crm_element_value_copy(request.xml, PCMK__XA_TASK);
+ CRM_CHECK(request.op != NULL, return 0);
+
+ attrd_handle_request(&request);
+ pcmk__reset_request(&request);
+ }
+
+ free_xml(xml);
+ return 0;
+}
+
+static struct qb_ipcs_service_handlers ipc_callbacks = {
+ .connection_accept = attrd_ipc_accept,
+ .connection_created = NULL,
+ .msg_process = attrd_ipc_dispatch,
+ .connection_closed = attrd_ipc_closed,
+ .connection_destroyed = attrd_ipc_destroy
+};
+
+void
+attrd_ipc_fini(void)
+{
+ if (ipcs != NULL) {
+ pcmk__drop_all_clients(ipcs);
+ qb_ipcs_destroy(ipcs);
+ ipcs = NULL;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Set up attrd IPC communication
+ */
+void
+attrd_init_ipc(void)
+{
+ pcmk__serve_attrd_ipc(&ipcs, &ipc_callbacks);
+}
diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c
new file mode 100644
index 0000000..184176a
--- /dev/null
+++ b/daemons/attrd/attrd_messages.c
@@ -0,0 +1,328 @@
+/*
+ * Copyright 2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <glib.h>
+
+#include <crm/common/messages_internal.h>
+#include <crm/msg_xml.h>
+
+#include "pacemaker-attrd.h"
+
+int minimum_protocol_version = -1;
+
+static GHashTable *attrd_handlers = NULL;
+
+static xmlNode *
+handle_unknown_request(pcmk__request_t *request)
+{
+ crm_err("Unknown IPC request %s from %s %s",
+ request->op, pcmk__request_origin_type(request),
+ pcmk__request_origin(request));
+ pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
+ "Unknown request type '%s' (bug?)", request->op);
+ return NULL;
+}
+
+static xmlNode *
+handle_clear_failure_request(pcmk__request_t *request)
+{
+ if (request->peer != NULL) {
+ /* It is not currently possible to receive this as a peer command,
+ * but will be, if we one day enable propagating this operation.
+ */
+ attrd_peer_clear_failure(request);
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ return NULL;
+ } else {
+ if (attrd_request_has_sync_point(request->xml)) {
+ /* If this client supplied a sync point it wants to wait for, add it to
+ * the wait list. Clients on this list will not receive an ACK until
+ * their sync point is hit which will result in the client stalled there
+ * until it receives a response.
+ *
+ * All other clients will receive the expected response as normal.
+ */
+ attrd_add_client_to_waitlist(request);
+
+ } else {
+ /* If the client doesn't want to wait for a sync point, go ahead and send
+ * the ACK immediately. Otherwise, we'll send the ACK when the appropriate
+ * sync point is reached.
+ */
+ attrd_send_ack(request->ipc_client, request->ipc_id,
+ request->ipc_flags);
+ }
+
+ return attrd_client_clear_failure(request);
+ }
+}
+
+static xmlNode *
+handle_confirm_request(pcmk__request_t *request)
+{
+ if (request->peer != NULL) {
+ int callid;
+
+ crm_debug("Received confirmation from %s", request->peer);
+
+ if (crm_element_value_int(request->xml, XML_LRM_ATTR_CALLID, &callid) == -1) {
+ pcmk__set_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
+ "Could not get callid from XML");
+ } else {
+ attrd_handle_confirmation(callid, request->peer);
+ }
+
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ return NULL;
+ } else {
+ return handle_unknown_request(request);
+ }
+}
+
+static xmlNode *
+handle_flush_request(pcmk__request_t *request)
+{
+ if (request->peer != NULL) {
+ /* Ignore. The flush command was removed in 2.0.0 but may be
+ * received from peers running older versions.
+ */
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ return NULL;
+ } else {
+ return handle_unknown_request(request);
+ }
+}
+
+static xmlNode *
+handle_query_request(pcmk__request_t *request)
+{
+ if (request->peer != NULL) {
+ return handle_unknown_request(request);
+ } else {
+ return attrd_client_query(request);
+ }
+}
+
+static xmlNode *
+handle_remove_request(pcmk__request_t *request)
+{
+ if (request->peer != NULL) {
+ const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_NODE_NAME);
+ attrd_peer_remove(host, true, request->peer);
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ return NULL;
+ } else {
+ return attrd_client_peer_remove(request);
+ }
+}
+
+static xmlNode *
+handle_refresh_request(pcmk__request_t *request)
+{
+ if (request->peer != NULL) {
+ return handle_unknown_request(request);
+ } else {
+ return attrd_client_refresh(request);
+ }
+}
+
+static xmlNode *
+handle_sync_request(pcmk__request_t *request)
+{
+ if (request->peer != NULL) {
+ crm_node_t *peer = crm_get_peer(0, request->peer);
+
+ attrd_peer_sync(peer, request->xml);
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ return NULL;
+ } else {
+ return handle_unknown_request(request);
+ }
+}
+
+static xmlNode *
+handle_sync_response_request(pcmk__request_t *request)
+{
+ if (request->ipc_client != NULL) {
+ return handle_unknown_request(request);
+ } else {
+ if (request->peer != NULL) {
+ crm_node_t *peer = crm_get_peer(0, request->peer);
+ bool peer_won = attrd_check_for_new_writer(peer, request->xml);
+
+ if (!pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) {
+ attrd_peer_sync_response(peer, peer_won, request->xml);
+ }
+ }
+
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ return NULL;
+ }
+}
+
+static xmlNode *
+handle_update_request(pcmk__request_t *request)
+{
+ if (request->peer != NULL) {
+ const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_NODE_NAME);
+ crm_node_t *peer = crm_get_peer(0, request->peer);
+
+ attrd_peer_update(peer, request->xml, host, false);
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ return NULL;
+
+ } else {
+ if (attrd_request_has_sync_point(request->xml)) {
+ /* If this client supplied a sync point it wants to wait for, add it to
+ * the wait list. Clients on this list will not receive an ACK until
+ * their sync point is hit which will result in the client stalled there
+ * until it receives a response.
+ *
+ * All other clients will receive the expected response as normal.
+ */
+ attrd_add_client_to_waitlist(request);
+
+ } else {
+ /* If the client doesn't want to wait for a sync point, go ahead and send
+ * the ACK immediately. Otherwise, we'll send the ACK when the appropriate
+ * sync point is reached.
+ *
+ * In the normal case, attrd_client_update can be called recursively which
+ * makes where to send the ACK tricky. Doing it here ensures the client
+ * only ever receives one.
+ */
+ attrd_send_ack(request->ipc_client, request->ipc_id,
+ request->flags|crm_ipc_client_response);
+ }
+
+ return attrd_client_update(request);
+ }
+}
+
+static void
+attrd_register_handlers(void)
+{
+ pcmk__server_command_t handlers[] = {
+ { PCMK__ATTRD_CMD_CLEAR_FAILURE, handle_clear_failure_request },
+ { PCMK__ATTRD_CMD_CONFIRM, handle_confirm_request },
+ { PCMK__ATTRD_CMD_FLUSH, handle_flush_request },
+ { PCMK__ATTRD_CMD_PEER_REMOVE, handle_remove_request },
+ { PCMK__ATTRD_CMD_QUERY, handle_query_request },
+ { PCMK__ATTRD_CMD_REFRESH, handle_refresh_request },
+ { PCMK__ATTRD_CMD_SYNC, handle_sync_request },
+ { PCMK__ATTRD_CMD_SYNC_RESPONSE, handle_sync_response_request },
+ { PCMK__ATTRD_CMD_UPDATE, handle_update_request },
+ { PCMK__ATTRD_CMD_UPDATE_DELAY, handle_update_request },
+ { PCMK__ATTRD_CMD_UPDATE_BOTH, handle_update_request },
+ { NULL, handle_unknown_request },
+ };
+
+ attrd_handlers = pcmk__register_handlers(handlers);
+}
+
+void
+attrd_unregister_handlers(void)
+{
+ if (attrd_handlers != NULL) {
+ g_hash_table_destroy(attrd_handlers);
+ attrd_handlers = NULL;
+ }
+}
+
+void
+attrd_handle_request(pcmk__request_t *request)
+{
+ xmlNode *reply = NULL;
+ char *log_msg = NULL;
+ const char *reason = NULL;
+
+ if (attrd_handlers == NULL) {
+ attrd_register_handlers();
+ }
+
+ reply = pcmk__process_request(request, attrd_handlers);
+
+ if (reply != NULL) {
+ crm_log_xml_trace(reply, "Reply");
+
+ if (request->ipc_client != NULL) {
+ pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply,
+ request->ipc_flags);
+ } else {
+ crm_err("Not sending CPG reply to client");
+ }
+
+ free_xml(reply);
+ }
+
+ reason = request->result.exit_reason;
+ log_msg = crm_strdup_printf("Processed %s request from %s %s: %s%s%s%s",
+ request->op, pcmk__request_origin_type(request),
+ pcmk__request_origin(request),
+ pcmk_exec_status_str(request->result.execution_status),
+ (reason == NULL)? "" : " (",
+ pcmk__s(reason, ""),
+ (reason == NULL)? "" : ")");
+
+ if (!pcmk__result_ok(&request->result)) {
+ crm_warn("%s", log_msg);
+ } else {
+ crm_debug("%s", log_msg);
+ }
+
+ free(log_msg);
+ pcmk__reset_request(request);
+}
+
+/*!
+ \internal
+ \brief Broadcast private attribute for local node with protocol version
+*/
+void
+attrd_broadcast_protocol(void)
+{
+ xmlNode *attrd_op = create_xml_node(NULL, __func__);
+
+ crm_xml_add(attrd_op, F_TYPE, T_ATTRD);
+ crm_xml_add(attrd_op, F_ORIG, crm_system_name);
+ crm_xml_add(attrd_op, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE);
+ crm_xml_add(attrd_op, PCMK__XA_ATTR_NAME, CRM_ATTR_PROTOCOL);
+ crm_xml_add(attrd_op, PCMK__XA_ATTR_VALUE, ATTRD_PROTOCOL_VERSION);
+ crm_xml_add_int(attrd_op, PCMK__XA_ATTR_IS_PRIVATE, 1);
+ pcmk__xe_add_node(attrd_op, attrd_cluster->uname, attrd_cluster->nodeid);
+
+ crm_debug("Broadcasting attrd protocol version %s for node %s",
+ ATTRD_PROTOCOL_VERSION, attrd_cluster->uname);
+
+ attrd_send_message(NULL, attrd_op, false); /* ends up at attrd_peer_message() */
+
+ free_xml(attrd_op);
+}
+
+gboolean
+attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm)
+{
+ const char *op = crm_element_value(data, PCMK__XA_TASK);
+
+ crm_xml_add(data, F_TYPE, T_ATTRD);
+ crm_xml_add(data, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION);
+
+ /* Request a confirmation from the destination peer node (which could
+ * be all if node is NULL) that the message has been received and
+ * acted upon.
+ */
+ if (!pcmk__str_eq(op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) {
+ pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm);
+ }
+
+ attrd_xml_add_writer(data);
+ return send_cluster_message(node, crm_msg_attrd, data, TRUE);
+}
diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c
new file mode 100644
index 0000000..d59ddd5
--- /dev/null
+++ b/daemons/attrd/attrd_sync.c
@@ -0,0 +1,577 @@
+/*
+ * Copyright 2022-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <crm/msg_xml.h>
+#include <crm/common/attrd_internal.h>
+
+#include "pacemaker-attrd.h"
+
+/* A hash table storing clients that are waiting on a sync point to be reached.
+ * The key is waitlist_client - just a plain int. The obvious key would be
+ * the IPC client's ID, but this is not guaranteed to be unique. A single client
+ * could be waiting on a sync point for multiple attributes at the same time.
+ *
+ * It is not expected that this hash table will ever be especially large.
+ */
+static GHashTable *waitlist = NULL;
+static int waitlist_client = 0;
+
+struct waitlist_node {
+ /* What kind of sync point does this node describe? */
+ enum attrd_sync_point sync_point;
+
+ /* Information required to construct and send a reply to the client. */
+ char *client_id;
+ uint32_t ipc_id;
+ uint32_t flags;
+};
+
+/* A hash table storing information on in-progress IPC requests that are awaiting
+ * confirmations. These requests are currently being processed by peer attrds and
+ * we are waiting to receive confirmation messages from each peer indicating that
+ * processing is complete.
+ *
+ * Multiple requests could be waiting on confirmations at the same time.
+ *
+ * The key is the unique callid for the IPC request, and the value is a
+ * confirmation_action struct.
+ */
+static GHashTable *expected_confirmations = NULL;
+
+/*!
+ * \internal
+ * \brief A structure describing a single IPC request that is awaiting confirmations
+ */
+struct confirmation_action {
+ /*!
+ * \brief A list of peer attrds that we are waiting to receive confirmation
+ * messages from
+ *
+ * This list is dynamic - as confirmations arrive from peer attrds, they will
+ * be removed from this list. When the list is empty, all peers have processed
+ * the request and the associated confirmation action will be taken.
+ */
+ GList *respondents;
+
+ /*!
+ * \brief A timer that will be used to remove the client should it time out
+ * before receiving all confirmations
+ */
+ mainloop_timer_t *timer;
+
+ /*!
+ * \brief A function to run when all confirmations have been received
+ */
+ attrd_confirmation_action_fn fn;
+
+ /*!
+ * \brief Information required to construct and send a reply to the client
+ */
+ char *client_id;
+ uint32_t ipc_id;
+ uint32_t flags;
+
+ /*!
+ * \brief The XML request containing the callid associated with this action
+ */
+ void *xml;
+};
+
+static void
+next_key(void)
+{
+ do {
+ waitlist_client++;
+ if (waitlist_client < 0) {
+ waitlist_client = 1;
+ }
+ } while (g_hash_table_contains(waitlist, GINT_TO_POINTER(waitlist_client)));
+}
+
+static void
+free_waitlist_node(gpointer data)
+{
+ struct waitlist_node *wl = (struct waitlist_node *) data;
+
+ free(wl->client_id);
+ free(wl);
+}
+
+static const char *
+sync_point_str(enum attrd_sync_point sync_point)
+{
+ if (sync_point == attrd_sync_point_local) {
+ return PCMK__VALUE_LOCAL;
+ } else if (sync_point == attrd_sync_point_cluster) {
+ return PCMK__VALUE_CLUSTER;
+ } else {
+ return "unknown";
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add a client to the attrd waitlist
+ *
+ * Typically, a client receives an ACK for its XML IPC request immediately. However,
+ * some clients want to wait until their request has been processed and taken effect.
+ * This is called a sync point. Any client placed on this waitlist will have its
+ * ACK message delayed until either its requested sync point is hit, or until it
+ * times out.
+ *
+ * The XML IPC request must specify the type of sync point it wants to wait for.
+ *
+ * \param[in,out] request The request describing the client to place on the waitlist.
+ */
+void
+attrd_add_client_to_waitlist(pcmk__request_t *request)
+{
+ const char *sync_point = attrd_request_sync_point(request->xml);
+ struct waitlist_node *wl = NULL;
+
+ if (sync_point == NULL) {
+ return;
+ }
+
+ if (waitlist == NULL) {
+ waitlist = pcmk__intkey_table(free_waitlist_node);
+ }
+
+ wl = calloc(sizeof(struct waitlist_node), 1);
+
+ CRM_ASSERT(wl != NULL);
+
+ wl->client_id = strdup(request->ipc_client->id);
+
+ CRM_ASSERT(wl->client_id);
+
+ if (pcmk__str_eq(sync_point, PCMK__VALUE_LOCAL, pcmk__str_none)) {
+ wl->sync_point = attrd_sync_point_local;
+ } else if (pcmk__str_eq(sync_point, PCMK__VALUE_CLUSTER, pcmk__str_none)) {
+ wl->sync_point = attrd_sync_point_cluster;
+ } else {
+ free_waitlist_node(wl);
+ return;
+ }
+
+ wl->ipc_id = request->ipc_id;
+ wl->flags = request->flags;
+
+ next_key();
+ pcmk__intkey_table_insert(waitlist, waitlist_client, wl);
+
+ crm_trace("Added client %s to waitlist for %s sync point",
+ wl->client_id, sync_point_str(wl->sync_point));
+ crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist));
+
+ /* And then add the key to the request XML so we can uniquely identify
+ * it when it comes time to issue the ACK.
+ */
+ crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client);
+}
+
+/*!
+ * \internal
+ * \brief Free all memory associated with the waitlist. This is most typically
+ * used when attrd shuts down.
+ */
+void
+attrd_free_waitlist(void)
+{
+ if (waitlist == NULL) {
+ return;
+ }
+
+ g_hash_table_destroy(waitlist);
+ waitlist = NULL;
+}
+
+/*!
+ * \internal
+ * \brief Unconditionally remove a client from the waitlist, such as when the client
+ * node disconnects from the cluster
+ *
+ * \param[in] client The client to remove
+ */
+void
+attrd_remove_client_from_waitlist(pcmk__client_t *client)
+{
+ GHashTableIter iter;
+ gpointer value;
+
+ if (waitlist == NULL) {
+ return;
+ }
+
+ g_hash_table_iter_init(&iter, waitlist);
+
+ while (g_hash_table_iter_next(&iter, NULL, &value)) {
+ struct waitlist_node *wl = (struct waitlist_node *) value;
+
+ if (pcmk__str_eq(wl->client_id, client->id, pcmk__str_none)) {
+ g_hash_table_iter_remove(&iter);
+ crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist));
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Send an IPC ACK message to all awaiting clients
+ *
+ * This function will search the waitlist for all clients that are currently awaiting
+ * an ACK indicating their attrd operation is complete. Only those clients with a
+ * matching sync point type and callid from their original XML IPC request will be
+ * ACKed. Once they have received an ACK, they will be removed from the waitlist.
+ *
+ * \param[in] sync_point What kind of sync point have we hit?
+ * \param[in] xml The original XML IPC request.
+ */
+void
+attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml)
+{
+ int callid;
+ gpointer value;
+
+ if (waitlist == NULL) {
+ return;
+ }
+
+ if (crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid) == -1) {
+ crm_warn("Could not get callid from request XML");
+ return;
+ }
+
+ value = pcmk__intkey_table_lookup(waitlist, callid);
+ if (value != NULL) {
+ struct waitlist_node *wl = (struct waitlist_node *) value;
+ pcmk__client_t *client = NULL;
+
+ if (wl->sync_point != sync_point) {
+ return;
+ }
+
+ crm_notice("Alerting client %s for reached %s sync point",
+ wl->client_id, sync_point_str(wl->sync_point));
+
+ client = pcmk__find_client_by_id(wl->client_id);
+ if (client == NULL) {
+ return;
+ }
+
+ attrd_send_ack(client, wl->ipc_id, wl->flags | crm_ipc_client_response);
+
+ /* And then remove the client so it doesn't get alerted again. */
+ pcmk__intkey_table_remove(waitlist, callid);
+
+ crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist));
+ }
+}
+
+/*!
+ * \internal
+ * \brief Action to take when a cluster sync point is hit for a
+ * PCMK__ATTRD_CMD_UPDATE* message.
+ *
+ * \param[in] xml The request that should be passed along to
+ * attrd_ack_waitlist_clients. This should be the original
+ * IPC request containing the callid for this update message.
+ */
+int
+attrd_cluster_sync_point_update(xmlNode *xml)
+{
+ crm_trace("Hit cluster sync point for attribute update");
+ attrd_ack_waitlist_clients(attrd_sync_point_cluster, xml);
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief Return the sync point attribute for an IPC request
+ *
+ * This function will check both the top-level element of \p xml for a sync
+ * point attribute, as well as all of its \p op children, if any. The latter
+ * is useful for newer versions of attrd that can put multiple IPC requests
+ * into a single message.
+ *
+ * \param[in] xml An XML IPC request
+ *
+ * \note It is assumed that if one child element has a sync point attribute,
+ * all will have a sync point attribute and they will all be the same
+ * sync point. No other configuration is supported.
+ *
+ * \return The sync point attribute of \p xml, or NULL if none.
+ */
+const char *
+attrd_request_sync_point(xmlNode *xml)
+{
+ if (xml_has_children(xml)) {
+ xmlNode *child = pcmk__xe_match(xml, XML_ATTR_OP, PCMK__XA_ATTR_SYNC_POINT, NULL);
+
+ if (child) {
+ return crm_element_value(child, PCMK__XA_ATTR_SYNC_POINT);
+ } else {
+ return NULL;
+ }
+
+ } else {
+ return crm_element_value(xml, PCMK__XA_ATTR_SYNC_POINT);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Does an IPC request contain any sync point attribute?
+ *
+ * \param[in] xml An XML IPC request
+ *
+ * \return true if there's a sync point attribute, false otherwise
+ */
+bool
+attrd_request_has_sync_point(xmlNode *xml)
+{
+ return attrd_request_sync_point(xml) != NULL;
+}
+
+static void
+free_action(gpointer data)
+{
+ struct confirmation_action *action = (struct confirmation_action *) data;
+ g_list_free_full(action->respondents, free);
+ mainloop_timer_del(action->timer);
+ free_xml(action->xml);
+ free(action->client_id);
+ free(action);
+}
+
+/* Remove an IPC request from the expected_confirmations table if the peer attrds
+ * don't respond before the timeout is hit. We set the timeout to 15s. The exact
+ * number isn't critical - we just want to make sure that the table eventually gets
+ * cleared of things that didn't complete.
+ */
+static gboolean
+confirmation_timeout_cb(gpointer data)
+{
+ struct confirmation_action *action = (struct confirmation_action *) data;
+
+ GHashTableIter iter;
+ gpointer value;
+
+ if (expected_confirmations == NULL) {
+ return G_SOURCE_REMOVE;
+ }
+
+ g_hash_table_iter_init(&iter, expected_confirmations);
+
+ while (g_hash_table_iter_next(&iter, NULL, &value)) {
+ if (value == action) {
+ pcmk__client_t *client = pcmk__find_client_by_id(action->client_id);
+ if (client == NULL) {
+ return G_SOURCE_REMOVE;
+ }
+
+ crm_trace("Timed out waiting for confirmations for client %s", client->id);
+ pcmk__ipc_send_ack(client, action->ipc_id, action->flags | crm_ipc_client_response,
+ "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_TIMEOUT);
+
+ g_hash_table_iter_remove(&iter);
+ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations));
+ break;
+ }
+ }
+
+ return G_SOURCE_REMOVE;
+}
+
+/*!
+ * \internal
+ * \brief When a peer disconnects from the cluster, no longer wait for its confirmation
+ * for any IPC action. If this peer is the last one being waited on, this will
+ * trigger the confirmation action.
+ *
+ * \param[in] host The disconnecting peer attrd's uname
+ */
+void
+attrd_do_not_expect_from_peer(const char *host)
+{
+ GList *keys = NULL;
+
+ if (expected_confirmations == NULL) {
+ return;
+ }
+
+ keys = g_hash_table_get_keys(expected_confirmations);
+
+ crm_trace("Removing peer %s from expected confirmations", host);
+
+ for (GList *node = keys; node != NULL; node = node->next) {
+ int callid = *(int *) node->data;
+ attrd_handle_confirmation(callid, host);
+ }
+
+ g_list_free(keys);
+}
+
+/*!
+ * \internal
+ * \brief When a client disconnects from the cluster, no longer wait on confirmations
+ * for it. Because the peer attrds may still be processing the original IPC
+ * message, they may still send us confirmations. However, we will take no
+ * action on them.
+ *
+ * \param[in] client The disconnecting client
+ */
+void
+attrd_do_not_wait_for_client(pcmk__client_t *client)
+{
+ GHashTableIter iter;
+ gpointer value;
+
+ if (expected_confirmations == NULL) {
+ return;
+ }
+
+ g_hash_table_iter_init(&iter, expected_confirmations);
+
+ while (g_hash_table_iter_next(&iter, NULL, &value)) {
+ struct confirmation_action *action = (struct confirmation_action *) value;
+
+ if (pcmk__str_eq(action->client_id, client->id, pcmk__str_none)) {
+ crm_trace("Removing client %s from expected confirmations", client->id);
+ g_hash_table_iter_remove(&iter);
+ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations));
+ break;
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Register some action to be taken when IPC request confirmations are
+ * received
+ *
+ * When this function is called, a list of all peer attrds that support confirming
+ * requests is generated. As confirmations from these peer attrds are received,
+ * they are removed from this list. When the list is empty, the registered action
+ * will be called.
+ *
+ * \note This function should always be called before attrd_send_message is called
+ * to broadcast to the peers to ensure that we know what replies we are
+ * waiting on. Otherwise, it is possible the peer could finish and confirm
+ * before we know to expect it.
+ *
+ * \param[in] request The request that is awaiting confirmations
+ * \param[in] fn A function to be run after all confirmations are received
+ */
+void
+attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn)
+{
+ struct confirmation_action *action = NULL;
+ GHashTableIter iter;
+ gpointer host, ver;
+ GList *respondents = NULL;
+ int callid;
+
+ if (expected_confirmations == NULL) {
+ expected_confirmations = pcmk__intkey_table((GDestroyNotify) free_action);
+ }
+
+ if (crm_element_value_int(request->xml, XML_LRM_ATTR_CALLID, &callid) == -1) {
+ crm_err("Could not get callid from xml");
+ return;
+ }
+
+ if (pcmk__intkey_table_lookup(expected_confirmations, callid)) {
+ crm_err("Already waiting on confirmations for call id %d", callid);
+ return;
+ }
+
+ g_hash_table_iter_init(&iter, peer_protocol_vers);
+ while (g_hash_table_iter_next(&iter, &host, &ver)) {
+ if (ATTRD_SUPPORTS_CONFIRMATION(GPOINTER_TO_INT(ver))) {
+ char *s = strdup((char *) host);
+
+ CRM_ASSERT(s != NULL);
+ respondents = g_list_prepend(respondents, s);
+ }
+ }
+
+ action = calloc(1, sizeof(struct confirmation_action));
+ CRM_ASSERT(action != NULL);
+
+ action->respondents = respondents;
+ action->fn = fn;
+ action->xml = copy_xml(request->xml);
+
+ action->client_id = strdup(request->ipc_client->id);
+ CRM_ASSERT(action->client_id != NULL);
+
+ action->ipc_id = request->ipc_id;
+ action->flags = request->flags;
+
+ action->timer = mainloop_timer_add(NULL, 15000, FALSE, confirmation_timeout_cb, action);
+ mainloop_timer_start(action->timer);
+
+ pcmk__intkey_table_insert(expected_confirmations, callid, action);
+ crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(respondents));
+ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations));
+}
+
+void
+attrd_free_confirmations(void)
+{
+ if (expected_confirmations != NULL) {
+ g_hash_table_destroy(expected_confirmations);
+ expected_confirmations = NULL;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Process a confirmation message from a peer attrd
+ *
+ * This function is called every time a PCMK__ATTRD_CMD_CONFIRM message is
+ * received from a peer attrd. If this is the last confirmation we are waiting
+ * on for a given operation, the registered action will be called.
+ *
+ * \param[in] callid The unique callid for the XML IPC request
+ * \param[in] host The confirming peer attrd's uname
+ */
+void
+attrd_handle_confirmation(int callid, const char *host)
+{
+ struct confirmation_action *action = NULL;
+ GList *node = NULL;
+
+ if (expected_confirmations == NULL) {
+ return;
+ }
+
+ action = pcmk__intkey_table_lookup(expected_confirmations, callid);
+ if (action == NULL) {
+ return;
+ }
+
+ node = g_list_find_custom(action->respondents, host, (GCompareFunc) strcasecmp);
+
+ if (node == NULL) {
+ return;
+ }
+
+ action->respondents = g_list_remove(action->respondents, node->data);
+ crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(action->respondents));
+
+ if (action->respondents == NULL) {
+ action->fn(action->xml);
+ pcmk__intkey_table_remove(expected_confirmations, callid);
+ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations));
+ }
+}
diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c
new file mode 100644
index 0000000..7de8dd9
--- /dev/null
+++ b/daemons/attrd/attrd_utils.c
@@ -0,0 +1,362 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <glib.h>
+#include <regex.h>
+#include <sys/types.h>
+
+#include <crm/crm.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/common/mainloop.h>
+#include <crm/msg_xml.h>
+
+#include "pacemaker-attrd.h"
+
+cib_t *the_cib = NULL;
+
+static bool requesting_shutdown = false;
+static bool shutting_down = false;
+static GMainLoop *mloop = NULL;
+
+/* A hash table storing information on the protocol version of each peer attrd.
+ * The key is the peer's uname, and the value is the protocol version number.
+ */
+GHashTable *peer_protocol_vers = NULL;
+
+/*!
+ * \internal
+ * \brief Set requesting_shutdown state
+ */
+void
+attrd_set_requesting_shutdown(void)
+{
+ requesting_shutdown = true;
+}
+
+/*!
+ * \internal
+ * \brief Clear requesting_shutdown state
+ */
+void
+attrd_clear_requesting_shutdown(void)
+{
+ requesting_shutdown = false;
+}
+
+/*!
+ * \internal
+ * \brief Check whether we're currently requesting shutdown
+ *
+ * \return true if requesting shutdown, false otherwise
+ */
+bool
+attrd_requesting_shutdown(void)
+{
+ return requesting_shutdown;
+}
+
+/*!
+ * \internal
+ * \brief Check whether we're currently shutting down
+ *
+ * \return true if shutting down, false otherwise
+ */
+bool
+attrd_shutting_down(void)
+{
+ return shutting_down;
+}
+
+/*!
+ * \internal
+ * \brief Exit (using mainloop or not, as appropriate)
+ *
+ * \param[in] nsig Ignored
+ */
+void
+attrd_shutdown(int nsig)
+{
+ // Tell various functions not to do anthing
+ shutting_down = true;
+
+ // Don't respond to signals while shutting down
+ mainloop_destroy_signal(SIGTERM);
+ mainloop_destroy_signal(SIGCHLD);
+ mainloop_destroy_signal(SIGPIPE);
+ mainloop_destroy_signal(SIGUSR1);
+ mainloop_destroy_signal(SIGUSR2);
+ mainloop_destroy_signal(SIGTRAP);
+
+ attrd_free_waitlist();
+ attrd_free_confirmations();
+
+ if (peer_protocol_vers != NULL) {
+ g_hash_table_destroy(peer_protocol_vers);
+ peer_protocol_vers = NULL;
+ }
+
+ if ((mloop == NULL) || !g_main_loop_is_running(mloop)) {
+ /* If there's no main loop active, just exit. This should be possible
+ * only if we get SIGTERM in brief windows at start-up and shutdown.
+ */
+ crm_exit(CRM_EX_OK);
+ } else {
+ g_main_loop_quit(mloop);
+ g_main_loop_unref(mloop);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Create a main loop for attrd
+ */
+void
+attrd_init_mainloop(void)
+{
+ mloop = g_main_loop_new(NULL, FALSE);
+}
+
+/*!
+ * \internal
+ * \brief Run attrd main loop
+ */
+void
+attrd_run_mainloop(void)
+{
+ g_main_loop_run(mloop);
+}
+
+void
+attrd_cib_disconnect(void)
+{
+ CRM_CHECK(the_cib != NULL, return);
+ the_cib->cmds->del_notify_callback(the_cib, T_CIB_REPLACE_NOTIFY, attrd_cib_replaced_cb);
+ the_cib->cmds->del_notify_callback(the_cib, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb);
+ cib__clean_up_connection(&the_cib);
+}
+
+void
+attrd_cib_replaced_cb(const char *event, xmlNode * msg)
+{
+ int change_section = cib_change_section_nodes | cib_change_section_status | cib_change_section_alerts;
+
+ if (attrd_requesting_shutdown() || attrd_shutting_down()) {
+ return;
+ }
+
+ crm_element_value_int(msg, F_CIB_CHANGE_SECTION, &change_section);
+
+ if (attrd_election_won()) {
+ if (change_section & (cib_change_section_nodes | cib_change_section_status)) {
+ crm_notice("Updating all attributes after %s event", event);
+ attrd_write_attributes(true, false);
+ }
+ }
+
+ if (change_section & cib_change_section_alerts) {
+ // Check for changes in alerts
+ mainloop_set_trigger(attrd_config_read);
+ }
+}
+
+/* strlen("value") */
+#define plus_plus_len (5)
+
+/*!
+ * \internal
+ * \brief Check whether an attribute value should be expanded
+ *
+ * \param[in] value Attribute value to check
+ *
+ * \return true if value needs expansion, false otherwise
+ */
+bool
+attrd_value_needs_expansion(const char *value)
+{
+ return ((strlen(value) >= (plus_plus_len + 2))
+ && (value[plus_plus_len] == '+')
+ && ((value[plus_plus_len + 1] == '+')
+ || (value[plus_plus_len + 1] == '=')));
+}
+
+/*!
+ * \internal
+ * \brief Expand an increment expression into an integer
+ *
+ * \param[in] value Attribute increment expression to expand
+ * \param[in] old_value Previous value of attribute
+ *
+ * \return Expanded value
+ */
+int
+attrd_expand_value(const char *value, const char *old_value)
+{
+ int offset = 1;
+ int int_value = char2score(old_value);
+
+ if (value[plus_plus_len + 1] != '+') {
+ const char *offset_s = value + (plus_plus_len + 2);
+
+ offset = char2score(offset_s);
+ }
+ int_value += offset;
+
+ if (int_value > INFINITY) {
+ int_value = INFINITY;
+ }
+ return int_value;
+}
+
+/*!
+ * \internal
+ * \brief Create regular expression matching failure-related attributes
+ *
+ * \param[out] regex Where to store created regular expression
+ * \param[in] rsc Name of resource to clear (or NULL for all)
+ * \param[in] op Operation to clear if rsc is specified (or NULL for all)
+ * \param[in] interval_ms Interval of operation to clear if op is specified
+ *
+ * \return pcmk_ok on success, -EINVAL if arguments are invalid
+ *
+ * \note The caller is responsible for freeing the result with regfree().
+ */
+int
+attrd_failure_regex(regex_t *regex, const char *rsc, const char *op,
+ guint interval_ms)
+{
+ char *pattern = NULL;
+ int rc;
+
+ /* Create a pattern that matches desired attributes */
+
+ if (rsc == NULL) {
+ pattern = strdup(ATTRD_RE_CLEAR_ALL);
+ } else if (op == NULL) {
+ pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc);
+ } else {
+ pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP, rsc, op, interval_ms);
+ }
+
+ /* Compile pattern into regular expression */
+ crm_trace("Clearing attributes matching %s", pattern);
+ rc = regcomp(regex, pattern, REG_EXTENDED|REG_NOSUB);
+ free(pattern);
+
+ return (rc == 0)? pcmk_ok : -EINVAL;
+}
+
+void
+attrd_free_attribute_value(gpointer data)
+{
+ attribute_value_t *v = data;
+
+ free(v->nodename);
+ free(v->current);
+ free(v->requested);
+ free(v);
+}
+
+void
+attrd_free_attribute(gpointer data)
+{
+ attribute_t *a = data;
+ if(a) {
+ free(a->id);
+ free(a->set_id);
+ free(a->set_type);
+ free(a->uuid);
+ free(a->user);
+
+ mainloop_timer_del(a->timer);
+ g_hash_table_destroy(a->values);
+
+ free(a);
+ }
+}
+
+/*!
+ * \internal
+ * \brief When a peer node leaves the cluster, stop tracking its protocol version.
+ *
+ * \param[in] host The peer node's uname to be removed
+ */
+void
+attrd_remove_peer_protocol_ver(const char *host)
+{
+ if (peer_protocol_vers != NULL) {
+ g_hash_table_remove(peer_protocol_vers, host);
+ }
+}
+
+/*!
+ * \internal
+ * \brief When a peer node broadcasts a message with its protocol version, keep
+ * track of that information.
+ *
+ * We keep track of each peer's protocol version so we know which peers to
+ * expect confirmation messages from when handling cluster-wide sync points.
+ * We additionally keep track of the lowest protocol version supported by all
+ * peers so we know when we can send IPC messages containing more than one
+ * request.
+ *
+ * \param[in] host The peer node's uname to be tracked
+ * \param[in] value The peer node's protocol version
+ */
+void
+attrd_update_minimum_protocol_ver(const char *host, const char *value)
+{
+ int ver;
+
+ if (peer_protocol_vers == NULL) {
+ peer_protocol_vers = pcmk__strkey_table(free, NULL);
+ }
+
+ pcmk__scan_min_int(value, &ver, 0);
+
+ if (ver > 0) {
+ char *host_name = strdup(host);
+
+ /* Record the peer attrd's protocol version. */
+ CRM_ASSERT(host_name != NULL);
+ g_hash_table_insert(peer_protocol_vers, host_name, GINT_TO_POINTER(ver));
+
+ /* If the protocol version is a new minimum, record it as such. */
+ if (minimum_protocol_version == -1 || ver < minimum_protocol_version) {
+ minimum_protocol_version = ver;
+ crm_trace("Set minimum attrd protocol version to %d",
+ minimum_protocol_version);
+ }
+ }
+}
+
+void
+attrd_copy_xml_attributes(xmlNode *src, xmlNode *dest)
+{
+ /* Copy attributes from the wrapper parent node into the child node.
+ * We can't just use copy_in_properties because we want to skip any
+ * attributes that are already set on the child. For instance, if
+ * we were told to use a specific node, there will already be a node
+ * attribute on the child. Copying the parent's node attribute over
+ * could result in the wrong value.
+ */
+ for (xmlAttrPtr a = pcmk__xe_first_attr(src); a != NULL; a = a->next) {
+ const char *p_name = (const char *) a->name;
+ const char *p_value = ((a == NULL) || (a->children == NULL)) ? NULL :
+ (const char *) a->children->content;
+
+ if (crm_element_value(dest, p_name) == NULL) {
+ crm_xml_add(dest, p_name, p_value);
+ }
+ }
+}
diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c
new file mode 100644
index 0000000..037825b
--- /dev/null
+++ b/daemons/attrd/pacemaker-attrd.c
@@ -0,0 +1,358 @@
+/*
+ * Copyright 2013-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <crm/crm.h>
+#include <crm/cib/internal.h>
+#include <crm/msg_xml.h>
+#include <crm/pengine/rules.h>
+#include <crm/common/cmdline_internal.h>
+#include <crm/common/iso8601.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/common/output_internal.h>
+#include <crm/common/xml.h>
+#include <crm/cluster/internal.h>
+
+#include <crm/common/attrd_internal.h>
+#include "pacemaker-attrd.h"
+
+#define SUMMARY "daemon for managing Pacemaker node attributes"
+
+gboolean stand_alone = FALSE;
+gchar **log_files = NULL;
+
+static GOptionEntry entries[] = {
+ { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone,
+ "(Advanced use only) Run in stand-alone mode", NULL },
+
+ { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY,
+ &log_files, "Send logs to the additional named logfile", NULL },
+
+ { NULL }
+};
+
+static pcmk__output_t *out = NULL;
+
+static pcmk__supported_format_t formats[] = {
+ PCMK__SUPPORTED_FORMAT_NONE,
+ PCMK__SUPPORTED_FORMAT_TEXT,
+ PCMK__SUPPORTED_FORMAT_XML,
+ { NULL, NULL, NULL }
+};
+
+lrmd_t *the_lrmd = NULL;
+crm_cluster_t *attrd_cluster = NULL;
+crm_trigger_t *attrd_config_read = NULL;
+crm_exit_t attrd_exit_status = CRM_EX_OK;
+
+static void
+attrd_cib_destroy_cb(gpointer user_data)
+{
+ cib_t *conn = user_data;
+
+ conn->cmds->signoff(conn); /* Ensure IPC is cleaned up */
+
+ if (attrd_shutting_down()) {
+ crm_info("Connection disconnection complete");
+
+ } else {
+ /* eventually this should trigger a reconnect, not a shutdown */
+ crm_crit("Lost connection to the CIB manager, shutting down");
+ attrd_exit_status = CRM_EX_DISCONNECT;
+ attrd_shutdown(0);
+ }
+
+ return;
+}
+
+static void
+attrd_erase_cb(xmlNode *msg, int call_id, int rc, xmlNode *output,
+ void *user_data)
+{
+ do_crm_log_unlikely((rc? LOG_NOTICE : LOG_DEBUG),
+ "Cleared transient attributes: %s "
+ CRM_XS " xpath=%s rc=%d",
+ pcmk_strerror(rc), (char *) user_data, rc);
+}
+
+#define XPATH_TRANSIENT "//node_state[@uname='%s']/" XML_TAG_TRANSIENT_NODEATTRS
+
+/*!
+ * \internal
+ * \brief Wipe all transient attributes for this node from the CIB
+ *
+ * Clear any previous transient node attributes from the CIB. This is
+ * normally done by the DC's controller when this node leaves the cluster, but
+ * this handles the case where the node restarted so quickly that the
+ * cluster layer didn't notice.
+ *
+ * \todo If pacemaker-attrd respawns after crashing (see PCMK_respawned),
+ * ideally we'd skip this and sync our attributes from the writer.
+ * However, currently we reject any values for us that the writer has, in
+ * attrd_peer_update().
+ */
+static void
+attrd_erase_attrs(void)
+{
+ int call_id;
+ char *xpath = crm_strdup_printf(XPATH_TRANSIENT, attrd_cluster->uname);
+
+ crm_info("Clearing transient attributes from CIB " CRM_XS " xpath=%s",
+ xpath);
+
+ call_id = the_cib->cmds->remove(the_cib, xpath, NULL, cib_xpath);
+ the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, xpath,
+ "attrd_erase_cb", attrd_erase_cb,
+ free);
+}
+
+static int
+attrd_cib_connect(int max_retry)
+{
+ static int attempts = 0;
+
+ int rc = -ENOTCONN;
+
+ the_cib = cib_new();
+ if (the_cib == NULL) {
+ return -ENOTCONN;
+ }
+
+ do {
+ if(attempts > 0) {
+ sleep(attempts);
+ }
+
+ attempts++;
+ crm_debug("Connection attempt %d to the CIB manager", attempts);
+ rc = the_cib->cmds->signon(the_cib, T_ATTRD, cib_command);
+
+ } while(rc != pcmk_ok && attempts < max_retry);
+
+ if (rc != pcmk_ok) {
+ crm_err("Connection to the CIB manager failed: %s " CRM_XS " rc=%d",
+ pcmk_strerror(rc), rc);
+ goto cleanup;
+ }
+
+ crm_debug("Connected to the CIB manager after %d attempts", attempts);
+
+ rc = the_cib->cmds->set_connection_dnotify(the_cib, attrd_cib_destroy_cb);
+ if (rc != pcmk_ok) {
+ crm_err("Could not set disconnection callback");
+ goto cleanup;
+ }
+
+ rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_REPLACE_NOTIFY, attrd_cib_replaced_cb);
+ if(rc != pcmk_ok) {
+ crm_err("Could not set CIB notification callback");
+ goto cleanup;
+ }
+
+ rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb);
+ if (rc != pcmk_ok) {
+ crm_err("Could not set CIB notification callback (update)");
+ goto cleanup;
+ }
+
+ return pcmk_ok;
+
+ cleanup:
+ cib__clean_up_connection(&the_cib);
+ return -ENOTCONN;
+}
+
+/*!
+ * \internal
+ * \brief Prepare the CIB after cluster is connected
+ */
+static void
+attrd_cib_init(void)
+{
+ // We have no attribute values in memory, wipe the CIB to match
+ attrd_erase_attrs();
+
+ // Set a trigger for reading the CIB (for the alerts section)
+ attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL);
+
+ // Always read the CIB at start-up
+ mainloop_set_trigger(attrd_config_read);
+}
+
+static bool
+ipc_already_running(void)
+{
+ pcmk_ipc_api_t *old_instance = NULL;
+ int rc = pcmk_rc_ok;
+
+ rc = pcmk_new_ipc_api(&old_instance, pcmk_ipc_attrd);
+ if (rc != pcmk_rc_ok) {
+ return false;
+ }
+
+ rc = pcmk_connect_ipc(old_instance, pcmk_ipc_dispatch_sync);
+ if (rc != pcmk_rc_ok) {
+ pcmk_free_ipc_api(old_instance);
+ return false;
+ }
+
+ pcmk_disconnect_ipc(old_instance);
+ pcmk_free_ipc_api(old_instance);
+ return true;
+}
+
+static GOptionContext *
+build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
+ GOptionContext *context = NULL;
+
+ context = pcmk__build_arg_context(args, "text (default), xml", group, NULL);
+ pcmk__add_main_args(context, entries);
+ return context;
+}
+
+int
+main(int argc, char **argv)
+{
+ int rc = pcmk_rc_ok;
+
+ GError *error = NULL;
+ bool initialized = false;
+
+ GOptionGroup *output_group = NULL;
+ pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
+ gchar **processed_args = pcmk__cmdline_preproc(argv, NULL);
+ GOptionContext *context = build_arg_context(args, &output_group);
+
+ attrd_init_mainloop();
+ crm_log_preinit(NULL, argc, argv);
+ mainloop_add_signal(SIGTERM, attrd_shutdown);
+
+ pcmk__register_formats(output_group, formats);
+ if (!g_option_context_parse_strv(context, &processed_args, &error)) {
+ attrd_exit_status = CRM_EX_USAGE;
+ goto done;
+ }
+
+ rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
+ if ((rc != pcmk_rc_ok) || (out == NULL)) {
+ attrd_exit_status = CRM_EX_ERROR;
+ g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status,
+ "Error creating output format %s: %s",
+ args->output_ty, pcmk_rc_str(rc));
+ goto done;
+ }
+
+ if (args->version) {
+ out->version(out, false);
+ goto done;
+ }
+
+ // Open additional log files
+ pcmk__add_logfiles(log_files, out);
+
+ crm_log_init(T_ATTRD, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
+ crm_notice("Starting Pacemaker node attribute manager%s",
+ stand_alone ? " in standalone mode" : "");
+
+ if (ipc_already_running()) {
+ const char *msg = "pacemaker-attrd is already active, aborting startup";
+
+ attrd_exit_status = CRM_EX_OK;
+ g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, "%s", msg);
+ crm_err(msg);
+ goto done;
+ }
+
+ initialized = true;
+
+ attributes = pcmk__strkey_table(NULL, attrd_free_attribute);
+
+ /* Connect to the CIB before connecting to the cluster or listening for IPC.
+ * This allows us to assume the CIB is connected whenever we process a
+ * cluster or IPC message (which also avoids start-up race conditions).
+ */
+ if (!stand_alone) {
+ if (attrd_cib_connect(30) != pcmk_ok) {
+ attrd_exit_status = CRM_EX_FATAL;
+ g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status,
+ "Could not connect to the CIB");
+ goto done;
+ }
+ crm_info("CIB connection active");
+ }
+
+ if (attrd_cluster_connect() != pcmk_ok) {
+ attrd_exit_status = CRM_EX_FATAL;
+ g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status,
+ "Could not connect to the cluster");
+ goto done;
+ }
+ crm_info("Cluster connection active");
+
+ // Initialization that requires the cluster to be connected
+ attrd_election_init();
+
+ if (!stand_alone) {
+ attrd_cib_init();
+ }
+
+ /* Set a private attribute for ourselves with the protocol version we
+ * support. This lets all nodes determine the minimum supported version
+ * across all nodes. It also ensures that the writer learns our node name,
+ * so it can send our attributes to the CIB.
+ */
+ attrd_broadcast_protocol();
+
+ attrd_init_ipc();
+ crm_notice("Pacemaker node attribute manager successfully started and accepting connections");
+ attrd_run_mainloop();
+
+ done:
+ if (initialized) {
+ crm_info("Shutting down attribute manager");
+
+ attrd_election_fini();
+ attrd_ipc_fini();
+ attrd_lrmd_disconnect();
+
+ if (!stand_alone) {
+ attrd_cib_disconnect();
+ }
+
+ attrd_free_waitlist();
+ pcmk_cluster_free(attrd_cluster);
+ g_hash_table_destroy(attributes);
+ }
+
+ g_strfreev(processed_args);
+ pcmk__free_arg_context(context);
+
+ g_strfreev(log_files);
+
+ pcmk__output_and_clear_error(&error, out);
+
+ if (out != NULL) {
+ out->finish(out, attrd_exit_status, true, NULL);
+ pcmk__output_free(out);
+ }
+ pcmk__unregister_formats();
+ crm_exit(attrd_exit_status);
+}
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
new file mode 100644
index 0000000..329fb5a
--- /dev/null
+++ b/daemons/attrd/pacemaker-attrd.h
@@ -0,0 +1,216 @@
+/*
+ * Copyright 2013-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef PACEMAKER_ATTRD__H
+# define PACEMAKER_ATTRD__H
+
+#include <regex.h>
+#include <glib.h>
+#include <crm/crm.h>
+#include <crm/cluster.h>
+#include <crm/cluster/election_internal.h>
+#include <crm/common/messages_internal.h>
+#include <crm/cib/internal.h>
+
+/*
+ * Legacy attrd (all pre-1.1.11 Pacemaker versions, plus all versions when used
+ * with the no-longer-supported CMAN or corosync-plugin stacks) is unversioned.
+ *
+ * With atomic attrd, each attrd will send ATTRD_PROTOCOL_VERSION with every
+ * peer request and reply. As of Pacemaker 2.0.0, at start-up each attrd will
+ * also set a private attribute for itself with its version, so any attrd can
+ * determine the minimum version supported by all peers.
+ *
+ * Protocol Pacemaker Significant changes
+ * -------- --------- -------------------
+ * 1 1.1.11 PCMK__ATTRD_CMD_UPDATE (PCMK__XA_ATTR_NAME only),
+ * PCMK__ATTRD_CMD_PEER_REMOVE, PCMK__ATTRD_CMD_REFRESH,
+ * PCMK__ATTRD_CMD_FLUSH, PCMK__ATTRD_CMD_SYNC,
+ * PCMK__ATTRD_CMD_SYNC_RESPONSE
+ * 1 1.1.13 PCMK__ATTRD_CMD_UPDATE (with PCMK__XA_ATTR_PATTERN),
+ * PCMK__ATTRD_CMD_QUERY
+ * 1 1.1.15 PCMK__ATTRD_CMD_UPDATE_BOTH,
+ * PCMK__ATTRD_CMD_UPDATE_DELAY
+ * 2 1.1.17 PCMK__ATTRD_CMD_CLEAR_FAILURE
+ * 3 2.1.1 PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes
+ * 4 2.1.5 Multiple attributes can be updated in a single IPC
+ * message
+ * 5 2.1.5 Peers can request confirmation of a sent message
+ */
+#define ATTRD_PROTOCOL_VERSION "5"
+
+#define ATTRD_SUPPORTS_MULTI_MESSAGE(x) ((x) >= 4)
+#define ATTRD_SUPPORTS_CONFIRMATION(x) ((x) >= 5)
+
+#define attrd_send_ack(client, id, flags) \
+ pcmk__ipc_send_ack((client), (id), (flags), "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_INDETERMINATE)
+
+void attrd_init_mainloop(void);
+void attrd_run_mainloop(void);
+
+void attrd_set_requesting_shutdown(void);
+void attrd_clear_requesting_shutdown(void);
+void attrd_free_waitlist(void);
+bool attrd_requesting_shutdown(void);
+bool attrd_shutting_down(void);
+void attrd_shutdown(int nsig);
+void attrd_init_ipc(void);
+void attrd_ipc_fini(void);
+
+void attrd_cib_disconnect(void);
+
+bool attrd_value_needs_expansion(const char *value);
+int attrd_expand_value(const char *value, const char *old_value);
+
+/* regular expression to clear failures of all resources */
+#define ATTRD_RE_CLEAR_ALL \
+ "^(" PCMK__FAIL_COUNT_PREFIX "|" PCMK__LAST_FAILURE_PREFIX ")-"
+
+/* regular expression to clear failure of all operations for one resource
+ * (format takes resource name)
+ *
+ * @COMPAT attributes set < 1.1.17:
+ * also match older attributes that do not have the operation part
+ */
+#define ATTRD_RE_CLEAR_ONE ATTRD_RE_CLEAR_ALL "%s(#.+_[0-9]+)?$"
+
+/* regular expression to clear failure of one operation for one resource
+ * (format takes resource name, operation name, and interval)
+ *
+ * @COMPAT attributes set < 1.1.17:
+ * also match older attributes that do not have the operation part
+ */
+#define ATTRD_RE_CLEAR_OP ATTRD_RE_CLEAR_ALL "%s(#%s_%u)?$"
+
+int attrd_failure_regex(regex_t *regex, const char *rsc, const char *op,
+ guint interval_ms);
+
+extern cib_t *the_cib;
+
+/* Alerts */
+
+extern lrmd_t *the_lrmd;
+extern crm_trigger_t *attrd_config_read;
+
+void attrd_lrmd_disconnect(void);
+gboolean attrd_read_options(gpointer user_data);
+void attrd_cib_replaced_cb(const char *event, xmlNode * msg);
+void attrd_cib_updated_cb(const char *event, xmlNode *msg);
+int attrd_send_attribute_alert(const char *node, int nodeid,
+ const char *attr, const char *value);
+
+// Elections
+void attrd_election_init(void);
+void attrd_election_fini(void);
+void attrd_start_election_if_needed(void);
+bool attrd_election_won(void);
+void attrd_handle_election_op(const crm_node_t *peer, xmlNode *xml);
+bool attrd_check_for_new_writer(const crm_node_t *peer, const xmlNode *xml);
+void attrd_declare_winner(void);
+void attrd_remove_voter(const crm_node_t *peer);
+void attrd_xml_add_writer(xmlNode *xml);
+
+typedef struct attribute_s {
+ char *uuid; /* TODO: Remove if at all possible */
+ char *id;
+ char *set_id;
+ char *set_type;
+ GHashTable *values;
+ int update;
+ int timeout_ms;
+
+ /* TODO: refactor these three as a bitmask */
+ bool changed; /* whether attribute value has changed since last write */
+ bool unknown_peer_uuids; /* whether we know we're missing a peer uuid */
+ gboolean is_private; /* whether to keep this attribute out of the CIB */
+
+ mainloop_timer_t *timer;
+
+ char *user;
+
+ gboolean force_write; /* Flag for updating attribute by ignoring delay */
+
+} attribute_t;
+
+typedef struct attribute_value_s {
+ uint32_t nodeid;
+ gboolean is_remote;
+ char *nodename;
+ char *current;
+ char *requested;
+ gboolean seen;
+} attribute_value_t;
+
+extern crm_cluster_t *attrd_cluster;
+extern GHashTable *attributes;
+extern GHashTable *peer_protocol_vers;
+
+#define CIB_OP_TIMEOUT_S 120
+
+int attrd_cluster_connect(void);
+void attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host,
+ bool filter);
+void attrd_peer_sync(crm_node_t *peer, xmlNode *xml);
+void attrd_peer_remove(const char *host, bool uncache, const char *source);
+void attrd_peer_clear_failure(pcmk__request_t *request);
+void attrd_peer_sync_response(const crm_node_t *peer, bool peer_won,
+ xmlNode *xml);
+
+void attrd_broadcast_protocol(void);
+xmlNode *attrd_client_peer_remove(pcmk__request_t *request);
+xmlNode *attrd_client_clear_failure(pcmk__request_t *request);
+xmlNode *attrd_client_update(pcmk__request_t *request);
+xmlNode *attrd_client_refresh(pcmk__request_t *request);
+xmlNode *attrd_client_query(pcmk__request_t *request);
+gboolean attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm);
+
+xmlNode *attrd_add_value_xml(xmlNode *parent, const attribute_t *a,
+ const attribute_value_t *v, bool force_write);
+void attrd_clear_value_seen(void);
+void attrd_free_attribute(gpointer data);
+void attrd_free_attribute_value(gpointer data);
+attribute_t *attrd_populate_attribute(xmlNode *xml, const char *attr);
+
+void attrd_write_attribute(attribute_t *a, bool ignore_delay);
+void attrd_write_attributes(bool all, bool ignore_delay);
+void attrd_write_or_elect_attribute(attribute_t *a);
+
+extern int minimum_protocol_version;
+void attrd_remove_peer_protocol_ver(const char *host);
+void attrd_update_minimum_protocol_ver(const char *host, const char *value);
+
+mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr);
+
+void attrd_unregister_handlers(void);
+void attrd_handle_request(pcmk__request_t *request);
+
+enum attrd_sync_point {
+ attrd_sync_point_local,
+ attrd_sync_point_cluster,
+};
+
+typedef int (*attrd_confirmation_action_fn)(xmlNode *);
+
+void attrd_add_client_to_waitlist(pcmk__request_t *request);
+void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml);
+int attrd_cluster_sync_point_update(xmlNode *xml);
+void attrd_do_not_expect_from_peer(const char *host);
+void attrd_do_not_wait_for_client(pcmk__client_t *client);
+void attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn);
+void attrd_free_confirmations(void);
+void attrd_handle_confirmation(int callid, const char *host);
+void attrd_remove_client_from_waitlist(pcmk__client_t *client);
+const char *attrd_request_sync_point(xmlNode *xml);
+bool attrd_request_has_sync_point(xmlNode *xml);
+
+void attrd_copy_xml_attributes(xmlNode *src, xmlNode *dest);
+
+extern gboolean stand_alone;
+
+#endif /* PACEMAKER_ATTRD__H */
diff --git a/daemons/based/Makefile.am b/daemons/based/Makefile.am
new file mode 100644
index 0000000..053d93c
--- /dev/null
+++ b/daemons/based/Makefile.am
@@ -0,0 +1,47 @@
+#
+# Copyright 2004-2021 the Pacemaker project contributors
+#
+# The version control history for this file may have further details.
+#
+# This source code is licensed under the GNU General Public License version 2
+# or later (GPLv2+) WITHOUT ANY WARRANTY.
+#
+
+include $(top_srcdir)/mk/common.mk
+
+EXTRA_DIST = cib.pam
+
+halibdir = $(CRM_DAEMON_DIR)
+
+COMMONLIBS = $(top_builddir)/lib/common/libcrmcommon.la \
+ $(top_builddir)/lib/cib/libcib.la
+
+halib_PROGRAMS = pacemaker-based
+
+noinst_HEADERS = pacemaker-based.h
+
+pacemaker_based_CFLAGS = $(CFLAGS_HARDENED_EXE)
+pacemaker_based_LDFLAGS = $(LDFLAGS_HARDENED_EXE)
+
+pacemaker_based_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \
+ $(COMMONLIBS) $(CLUSTERLIBS)
+
+pacemaker_based_SOURCES = pacemaker-based.c \
+ based_callbacks.c \
+ based_common.c \
+ based_io.c \
+ based_messages.c \
+ based_notify.c \
+ based_remote.c
+
+clean-generic:
+ rm -f *.log *.debug *.xml *~
+
+if BUILD_LEGACY_LINKS
+install-exec-hook:
+ $(MKDIR_P) -- $(DESTDIR)$(CRM_DAEMON_DIR)
+ cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f cib && $(LN_S) pacemaker-based cib
+
+uninstall-hook:
+ cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f cib
+endif
diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c
new file mode 100644
index 0000000..3726caa
--- /dev/null
+++ b/daemons/based/based_callbacks.c
@@ -0,0 +1,1696 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <stdlib.h>
+#include <stdint.h> // uint32_t, uint64_t, UINT64_C()
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h> // PRIu64
+
+#include <crm/crm.h>
+#include <crm/cib.h>
+#include <crm/msg_xml.h>
+#include <crm/cluster/internal.h>
+
+#include <crm/common/xml.h>
+#include <crm/common/remote_internal.h>
+
+#include <pacemaker-based.h>
+
+#define EXIT_ESCALATION_MS 10000
+#define OUR_NODENAME (stand_alone? "localhost" : crm_cluster->uname)
+
+static unsigned long cib_local_bcast_num = 0;
+
+typedef struct cib_local_notify_s {
+ xmlNode *notify_src;
+ char *client_id;
+ gboolean from_peer;
+ gboolean sync_reply;
+} cib_local_notify_t;
+
+int next_client_id = 0;
+
+gboolean legacy_mode = FALSE;
+
+qb_ipcs_service_t *ipcs_ro = NULL;
+qb_ipcs_service_t *ipcs_rw = NULL;
+qb_ipcs_service_t *ipcs_shm = NULL;
+
+static void cib_process_request(xmlNode *request, gboolean privileged,
+ const pcmk__client_t *cib_client);
+
+static int cib_process_command(xmlNode *request, xmlNode **reply,
+ xmlNode **cib_diff, gboolean privileged);
+
+static gboolean cib_common_callback(qb_ipcs_connection_t *c, void *data,
+ size_t size, gboolean privileged);
+
+gboolean
+cib_legacy_mode(void)
+{
+ return legacy_mode;
+}
+
+static int32_t
+cib_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
+{
+ if (cib_shutdown_flag) {
+ crm_info("Ignoring new IPC client [%d] during shutdown",
+ pcmk__client_pid(c));
+ return -EPERM;
+ }
+
+ if (pcmk__new_client(c, uid, gid) == NULL) {
+ return -EIO;
+ }
+ return 0;
+}
+
+static int32_t
+cib_ipc_dispatch_rw(qb_ipcs_connection_t * c, void *data, size_t size)
+{
+ pcmk__client_t *client = pcmk__find_client(c);
+
+ crm_trace("%p message from %s", c, client->id);
+ return cib_common_callback(c, data, size, TRUE);
+}
+
+static int32_t
+cib_ipc_dispatch_ro(qb_ipcs_connection_t * c, void *data, size_t size)
+{
+ pcmk__client_t *client = pcmk__find_client(c);
+
+ crm_trace("%p message from %s", c, client->id);
+ return cib_common_callback(c, data, size, FALSE);
+}
+
+/* Error code means? */
+static int32_t
+cib_ipc_closed(qb_ipcs_connection_t * c)
+{
+ pcmk__client_t *client = pcmk__find_client(c);
+
+ if (client == NULL) {
+ return 0;
+ }
+ crm_trace("Connection %p", c);
+ pcmk__free_client(client);
+ return 0;
+}
+
+static void
+cib_ipc_destroy(qb_ipcs_connection_t * c)
+{
+ crm_trace("Connection %p", c);
+ cib_ipc_closed(c);
+ if (cib_shutdown_flag) {
+ cib_shutdown(0);
+ }
+}
+
+struct qb_ipcs_service_handlers ipc_ro_callbacks = {
+ .connection_accept = cib_ipc_accept,
+ .connection_created = NULL,
+ .msg_process = cib_ipc_dispatch_ro,
+ .connection_closed = cib_ipc_closed,
+ .connection_destroyed = cib_ipc_destroy
+};
+
+struct qb_ipcs_service_handlers ipc_rw_callbacks = {
+ .connection_accept = cib_ipc_accept,
+ .connection_created = NULL,
+ .msg_process = cib_ipc_dispatch_rw,
+ .connection_closed = cib_ipc_closed,
+ .connection_destroyed = cib_ipc_destroy
+};
+
+void
+cib_common_callback_worker(uint32_t id, uint32_t flags, xmlNode * op_request,
+ pcmk__client_t *cib_client, gboolean privileged)
+{
+ const char *op = crm_element_value(op_request, F_CIB_OPERATION);
+
+ if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) {
+ if (flags & crm_ipc_client_response) {
+ xmlNode *ack = create_xml_node(NULL, __func__);
+
+ crm_xml_add(ack, F_CIB_OPERATION, CRM_OP_REGISTER);
+ crm_xml_add(ack, F_CIB_CLIENTID, cib_client->id);
+ pcmk__ipc_send_xml(cib_client, id, ack, flags);
+ cib_client->request_id = 0;
+ free_xml(ack);
+ }
+ return;
+
+ } else if (pcmk__str_eq(op, T_CIB_NOTIFY, pcmk__str_none)) {
+ /* Update the notify filters for this client */
+ int on_off = 0;
+ crm_exit_t status = CRM_EX_OK;
+ uint64_t bit = UINT64_C(0);
+ const char *type = crm_element_value(op_request, F_CIB_NOTIFY_TYPE);
+
+ crm_element_value_int(op_request, F_CIB_NOTIFY_ACTIVATE, &on_off);
+
+ crm_debug("Setting %s callbacks %s for client %s",
+ type, (on_off? "on" : "off"), pcmk__client_name(cib_client));
+
+ if (pcmk__str_eq(type, T_CIB_POST_NOTIFY, pcmk__str_casei)) {
+ bit = cib_notify_post;
+
+ } else if (pcmk__str_eq(type, T_CIB_PRE_NOTIFY, pcmk__str_casei)) {
+ bit = cib_notify_pre;
+
+ } else if (pcmk__str_eq(type, T_CIB_UPDATE_CONFIRM, pcmk__str_casei)) {
+ bit = cib_notify_confirm;
+
+ } else if (pcmk__str_eq(type, T_CIB_DIFF_NOTIFY, pcmk__str_casei)) {
+ bit = cib_notify_diff;
+
+ } else if (pcmk__str_eq(type, T_CIB_REPLACE_NOTIFY, pcmk__str_casei)) {
+ bit = cib_notify_replace;
+
+ } else {
+ status = CRM_EX_INVALID_PARAM;
+ }
+
+ if (bit != 0) {
+ if (on_off) {
+ pcmk__set_client_flags(cib_client, bit);
+ } else {
+ pcmk__clear_client_flags(cib_client, bit);
+ }
+ }
+
+ pcmk__ipc_send_ack(cib_client, id, flags, "ack", NULL, status);
+ return;
+ }
+
+ cib_process_request(op_request, privileged, cib_client);
+}
+
+int32_t
+cib_common_callback(qb_ipcs_connection_t * c, void *data, size_t size, gboolean privileged)
+{
+ uint32_t id = 0;
+ uint32_t flags = 0;
+ int call_options = 0;
+ pcmk__client_t *cib_client = pcmk__find_client(c);
+ xmlNode *op_request = pcmk__client_data2xml(cib_client, data, &id, &flags);
+
+ if (op_request) {
+ crm_element_value_int(op_request, F_CIB_CALLOPTS, &call_options);
+ }
+
+ if (op_request == NULL) {
+ crm_trace("Invalid message from %p", c);
+ pcmk__ipc_send_ack(cib_client, id, flags, "nack", NULL, CRM_EX_PROTOCOL);
+ return 0;
+
+ } else if(cib_client == NULL) {
+ crm_trace("Invalid client %p", c);
+ return 0;
+ }
+
+ if (pcmk_is_set(call_options, cib_sync_call)) {
+ CRM_LOG_ASSERT(flags & crm_ipc_client_response);
+ CRM_LOG_ASSERT(cib_client->request_id == 0); /* This means the client has two synchronous events in-flight */
+ cib_client->request_id = id; /* Reply only to the last one */
+ }
+
+ if (cib_client->name == NULL) {
+ const char *value = crm_element_value(op_request, F_CIB_CLIENTNAME);
+
+ if (value == NULL) {
+ cib_client->name = pcmk__itoa(cib_client->pid);
+ } else {
+ cib_client->name = strdup(value);
+ if (crm_is_daemon_name(value)) {
+ pcmk__set_client_flags(cib_client, cib_is_daemon);
+ }
+ }
+ }
+
+ /* Allow cluster daemons more leeway before being evicted */
+ if (pcmk_is_set(cib_client->flags, cib_is_daemon)) {
+ const char *qmax = cib_config_lookup("cluster-ipc-limit");
+
+ if (pcmk__set_client_queue_max(cib_client, qmax)) {
+ crm_trace("IPC threshold for client %s[%u] is now %u",
+ pcmk__client_name(cib_client), cib_client->pid,
+ cib_client->queue_max);
+ }
+ }
+
+ crm_xml_add(op_request, F_CIB_CLIENTID, cib_client->id);
+ crm_xml_add(op_request, F_CIB_CLIENTNAME, cib_client->name);
+
+ CRM_LOG_ASSERT(cib_client->user != NULL);
+ pcmk__update_acl_user(op_request, F_CIB_USER, cib_client->user);
+
+ cib_common_callback_worker(id, flags, op_request, cib_client, privileged);
+ free_xml(op_request);
+
+ return 0;
+}
+
+static uint64_t ping_seq = 0;
+static char *ping_digest = NULL;
+static bool ping_modified_since = FALSE;
+
+static gboolean
+cib_digester_cb(gpointer data)
+{
+ if (based_is_primary) {
+ char buffer[32];
+ xmlNode *ping = create_xml_node(NULL, "ping");
+
+ ping_seq++;
+ free(ping_digest);
+ ping_digest = NULL;
+ ping_modified_since = FALSE;
+ snprintf(buffer, 32, "%" PRIu64, ping_seq);
+ crm_trace("Requesting peer digests (%s)", buffer);
+
+ crm_xml_add(ping, F_TYPE, "cib");
+ crm_xml_add(ping, F_CIB_OPERATION, CRM_OP_PING);
+ crm_xml_add(ping, F_CIB_PING_ID, buffer);
+
+ crm_xml_add(ping, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
+ send_cluster_message(NULL, crm_msg_cib, ping, TRUE);
+
+ free_xml(ping);
+ }
+ return FALSE;
+}
+
+static void
+process_ping_reply(xmlNode *reply)
+{
+ uint64_t seq = 0;
+ const char *host = crm_element_value(reply, F_ORIG);
+
+ xmlNode *pong = get_message_xml(reply, F_CIB_CALLDATA);
+ const char *seq_s = crm_element_value(pong, F_CIB_PING_ID);
+ const char *digest = crm_element_value(pong, XML_ATTR_DIGEST);
+
+ if (seq_s == NULL) {
+ crm_debug("Ignoring ping reply with no " F_CIB_PING_ID);
+ return;
+
+ } else {
+ long long seq_ll;
+
+ if (pcmk__scan_ll(seq_s, &seq_ll, 0LL) != pcmk_rc_ok) {
+ return;
+ }
+ seq = (uint64_t) seq_ll;
+ }
+
+ if(digest == NULL) {
+ crm_trace("Ignoring ping reply %s from %s with no digest", seq_s, host);
+
+ } else if(seq != ping_seq) {
+ crm_trace("Ignoring out of sequence ping reply %s from %s", seq_s, host);
+
+ } else if(ping_modified_since) {
+ crm_trace("Ignoring ping reply %s from %s: cib updated since", seq_s, host);
+
+ } else {
+ const char *version = crm_element_value(pong, XML_ATTR_CRM_VERSION);
+
+ if(ping_digest == NULL) {
+ crm_trace("Calculating new digest");
+ ping_digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, version);
+ }
+
+ crm_trace("Processing ping reply %s from %s (%s)", seq_s, host, digest);
+ if (!pcmk__str_eq(ping_digest, digest, pcmk__str_casei)) {
+ xmlNode *remote_cib = get_message_xml(pong, F_CIB_CALLDATA);
+
+ crm_notice("Local CIB %s.%s.%s.%s differs from %s: %s.%s.%s.%s %p",
+ crm_element_value(the_cib, XML_ATTR_GENERATION_ADMIN),
+ crm_element_value(the_cib, XML_ATTR_GENERATION),
+ crm_element_value(the_cib, XML_ATTR_NUMUPDATES),
+ ping_digest, host,
+ remote_cib?crm_element_value(remote_cib, XML_ATTR_GENERATION_ADMIN):"_",
+ remote_cib?crm_element_value(remote_cib, XML_ATTR_GENERATION):"_",
+ remote_cib?crm_element_value(remote_cib, XML_ATTR_NUMUPDATES):"_",
+ digest, remote_cib);
+
+ if(remote_cib && remote_cib->children) {
+ // Additional debug
+ xml_calculate_changes(the_cib, remote_cib);
+
+ pcmk__output_set_log_level(logger_out, LOG_INFO);
+ pcmk__xml_show_changes(logger_out, remote_cib);
+ crm_trace("End of differences");
+ }
+
+ free_xml(remote_cib);
+ sync_our_cib(reply, FALSE);
+ }
+ }
+}
+
+static void
+do_local_notify(xmlNode * notify_src, const char *client_id,
+ gboolean sync_reply, gboolean from_peer)
+{
+ int rid = 0;
+ int call_id = 0;
+ pcmk__client_t *client_obj = NULL;
+
+ CRM_ASSERT(notify_src && client_id);
+
+ crm_element_value_int(notify_src, F_CIB_CALLID, &call_id);
+
+ client_obj = pcmk__find_client_by_id(client_id);
+ if (client_obj == NULL) {
+ crm_debug("Could not send response %d: client %s not found",
+ call_id, client_id);
+ return;
+ }
+
+ if (sync_reply) {
+ if (client_obj->ipcs) {
+ CRM_LOG_ASSERT(client_obj->request_id);
+
+ rid = client_obj->request_id;
+ client_obj->request_id = 0;
+
+ crm_trace("Sending response %d to client %s%s",
+ rid, pcmk__client_name(client_obj),
+ (from_peer? " (originator of delegated request)" : ""));
+ } else {
+ crm_trace("Sending response (call %d) to client %s%s",
+ call_id, pcmk__client_name(client_obj),
+ (from_peer? " (originator of delegated request)" : ""));
+ }
+
+ } else {
+ crm_trace("Sending event %d to client %s%s",
+ call_id, pcmk__client_name(client_obj),
+ (from_peer? " (originator of delegated request)" : ""));
+ }
+
+ switch (PCMK__CLIENT_TYPE(client_obj)) {
+ case pcmk__client_ipc:
+ {
+ int rc = pcmk__ipc_send_xml(client_obj, rid, notify_src,
+ (sync_reply? crm_ipc_flags_none
+ : crm_ipc_server_event));
+
+ if (rc != pcmk_rc_ok) {
+ crm_warn("%s reply to client %s failed: %s " CRM_XS " rc=%d",
+ (sync_reply? "Synchronous" : "Asynchronous"),
+ pcmk__client_name(client_obj), pcmk_rc_str(rc),
+ rc);
+ }
+ }
+ break;
+#ifdef HAVE_GNUTLS_GNUTLS_H
+ case pcmk__client_tls:
+#endif
+ case pcmk__client_tcp:
+ pcmk__remote_send_xml(client_obj->remote, notify_src);
+ break;
+ default:
+ crm_err("Unknown transport for client %s "
+ CRM_XS " flags=%#016" PRIx64,
+ pcmk__client_name(client_obj), client_obj->flags);
+ }
+}
+
+static void
+local_notify_destroy_callback(gpointer data)
+{
+ cib_local_notify_t *notify = data;
+
+ free_xml(notify->notify_src);
+ free(notify->client_id);
+ free(notify);
+}
+
+static void
+check_local_notify(int bcast_id)
+{
+ cib_local_notify_t *notify = NULL;
+
+ if (!local_notify_queue) {
+ return;
+ }
+
+ notify = pcmk__intkey_table_lookup(local_notify_queue, bcast_id);
+
+ if (notify) {
+ do_local_notify(notify->notify_src, notify->client_id, notify->sync_reply,
+ notify->from_peer);
+ pcmk__intkey_table_remove(local_notify_queue, bcast_id);
+ }
+}
+
+static void
+queue_local_notify(xmlNode * notify_src, const char *client_id, gboolean sync_reply,
+ gboolean from_peer)
+{
+ cib_local_notify_t *notify = calloc(1, sizeof(cib_local_notify_t));
+
+ notify->notify_src = notify_src;
+ notify->client_id = strdup(client_id);
+ notify->sync_reply = sync_reply;
+ notify->from_peer = from_peer;
+
+ if (!local_notify_queue) {
+ local_notify_queue = pcmk__intkey_table(local_notify_destroy_callback);
+ }
+ pcmk__intkey_table_insert(local_notify_queue, cib_local_bcast_num, notify);
+ // cppcheck doesn't know notify will get freed when hash table is destroyed
+ // cppcheck-suppress memleak
+}
+
+static void
+parse_local_options_v1(const pcmk__client_t *cib_client, int call_type,
+ int call_options, const char *host, const char *op,
+ gboolean *local_notify, gboolean *needs_reply,
+ gboolean *process, gboolean *needs_forward)
+{
+ if (cib_op_modifies(call_type)
+ && !(call_options & cib_inhibit_bcast)) {
+ /* we need to send an update anyway */
+ *needs_reply = TRUE;
+ } else {
+ *needs_reply = FALSE;
+ }
+
+ if (host == NULL && (call_options & cib_scope_local)) {
+ crm_trace("Processing locally scoped %s op from client %s",
+ op, pcmk__client_name(cib_client));
+ *local_notify = TRUE;
+
+ } else if ((host == NULL) && based_is_primary) {
+ crm_trace("Processing %s op locally from client %s as primary",
+ op, pcmk__client_name(cib_client));
+ *local_notify = TRUE;
+
+ } else if (pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei)) {
+ crm_trace("Processing locally addressed %s op from client %s",
+ op, pcmk__client_name(cib_client));
+ *local_notify = TRUE;
+
+ } else if (stand_alone) {
+ *needs_forward = FALSE;
+ *local_notify = TRUE;
+ *process = TRUE;
+
+ } else {
+ crm_trace("%s op from %s needs to be forwarded to client %s",
+ op, pcmk__client_name(cib_client),
+ pcmk__s(host, "the primary instance"));
+ *needs_forward = TRUE;
+ *process = FALSE;
+ }
+}
+
+static void
+parse_local_options_v2(const pcmk__client_t *cib_client, int call_type,
+ int call_options, const char *host, const char *op,
+ gboolean *local_notify, gboolean *needs_reply,
+ gboolean *process, gboolean *needs_forward)
+{
+ if (cib_op_modifies(call_type)) {
+ if (pcmk__str_any_of(op, PCMK__CIB_REQUEST_PRIMARY,
+ PCMK__CIB_REQUEST_SECONDARY, NULL)) {
+ /* Always handle these locally */
+ *process = TRUE;
+ *needs_reply = FALSE;
+ *local_notify = TRUE;
+ *needs_forward = FALSE;
+ return;
+
+ } else {
+ /* Redirect all other updates via CPG */
+ *needs_reply = TRUE;
+ *needs_forward = TRUE;
+ *process = FALSE;
+ crm_trace("%s op from %s needs to be forwarded to client %s",
+ op, pcmk__client_name(cib_client),
+ pcmk__s(host, "the primary instance"));
+ return;
+ }
+ }
+
+
+ *process = TRUE;
+ *needs_reply = FALSE;
+ *local_notify = TRUE;
+ *needs_forward = FALSE;
+
+ if (stand_alone) {
+ crm_trace("Processing %s op from client %s (stand-alone)",
+ op, pcmk__client_name(cib_client));
+
+ } else if (host == NULL) {
+ crm_trace("Processing unaddressed %s op from client %s",
+ op, pcmk__client_name(cib_client));
+
+ } else if (pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei)) {
+ crm_trace("Processing locally addressed %s op from client %s",
+ op, pcmk__client_name(cib_client));
+
+ } else {
+ crm_trace("%s op from %s needs to be forwarded to client %s",
+ op, pcmk__client_name(cib_client), host);
+ *needs_forward = TRUE;
+ *process = FALSE;
+ }
+}
+
+static void
+parse_local_options(const pcmk__client_t *cib_client, int call_type,
+ int call_options, const char *host, const char *op,
+ gboolean *local_notify, gboolean *needs_reply,
+ gboolean *process, gboolean *needs_forward)
+{
+ if(cib_legacy_mode()) {
+ parse_local_options_v1(cib_client, call_type, call_options, host,
+ op, local_notify, needs_reply, process, needs_forward);
+ } else {
+ parse_local_options_v2(cib_client, call_type, call_options, host,
+ op, local_notify, needs_reply, process, needs_forward);
+ }
+}
+
+static gboolean
+parse_peer_options_v1(int call_type, xmlNode * request,
+ gboolean * local_notify, gboolean * needs_reply, gboolean * process,
+ gboolean * needs_forward)
+{
+ const char *op = NULL;
+ const char *host = NULL;
+ const char *delegated = NULL;
+ const char *originator = crm_element_value(request, F_ORIG);
+ const char *reply_to = crm_element_value(request, F_CIB_ISREPLY);
+
+ gboolean is_reply = pcmk__str_eq(reply_to, OUR_NODENAME, pcmk__str_casei);
+
+ if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) {
+ *needs_reply = FALSE;
+ if (is_reply) {
+ *local_notify = TRUE;
+ crm_trace("Processing global/peer update from %s"
+ " that originated from us", originator);
+ } else {
+ crm_trace("Processing global/peer update from %s", originator);
+ }
+ return TRUE;
+ }
+
+ op = crm_element_value(request, F_CIB_OPERATION);
+ crm_trace("Processing %s request sent by %s", op, originator);
+ if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SHUTDOWN, pcmk__str_none)) {
+ /* Always process these */
+ *local_notify = FALSE;
+ if (reply_to == NULL || is_reply) {
+ *process = TRUE;
+ }
+ if (is_reply) {
+ *needs_reply = FALSE;
+ }
+ return *process;
+ }
+
+ if (is_reply && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) {
+ process_ping_reply(request);
+ return FALSE;
+ }
+
+ if (is_reply) {
+ crm_trace("Forward reply sent from %s to local clients", originator);
+ *process = FALSE;
+ *needs_reply = FALSE;
+ *local_notify = TRUE;
+ return TRUE;
+ }
+
+ host = crm_element_value(request, F_CIB_HOST);
+ if (pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei)) {
+ crm_trace("Processing %s request sent to us from %s", op, originator);
+ return TRUE;
+
+ } else if(is_reply == FALSE && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) {
+ crm_trace("Processing %s request sent to %s by %s", op, host?host:"everyone", originator);
+ *needs_reply = TRUE;
+ return TRUE;
+
+ } else if ((host == NULL) && based_is_primary) {
+ crm_trace("Processing %s request sent to primary instance from %s",
+ op, originator);
+ return TRUE;
+ }
+
+ delegated = crm_element_value(request, F_CIB_DELEGATED);
+ if (delegated != NULL) {
+ crm_trace("Ignoring message for primary instance");
+
+ } else if (host != NULL) {
+ /* this is for a specific instance and we're not it */
+ crm_trace("Ignoring msg for instance on %s", host);
+
+ } else if ((reply_to == NULL) && !based_is_primary) {
+ // This is for the primary instance, and we're not it
+ crm_trace("Ignoring reply for primary instance");
+
+ } else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SHUTDOWN, pcmk__str_none)) {
+ if (reply_to != NULL) {
+ crm_debug("Processing %s from %s", op, originator);
+ *needs_reply = FALSE;
+
+ } else {
+ crm_debug("Processing %s reply from %s", op, originator);
+ }
+ return TRUE;
+
+ } else {
+ crm_err("Nothing for us to do?");
+ crm_log_xml_err(request, "Peer[inbound]");
+ }
+
+ return FALSE;
+}
+
+static gboolean
+parse_peer_options_v2(int call_type, xmlNode * request,
+ gboolean * local_notify, gboolean * needs_reply, gboolean * process,
+ gboolean * needs_forward)
+{
+ const char *host = NULL;
+ const char *delegated = crm_element_value(request, F_CIB_DELEGATED);
+ const char *op = crm_element_value(request, F_CIB_OPERATION);
+ const char *originator = crm_element_value(request, F_ORIG);
+ const char *reply_to = crm_element_value(request, F_CIB_ISREPLY);
+
+ gboolean is_reply = pcmk__str_eq(reply_to, OUR_NODENAME, pcmk__str_casei);
+
+ if (pcmk__str_eq(op, PCMK__CIB_REQUEST_REPLACE, pcmk__str_none)) {
+ /* sync_our_cib() sets F_CIB_ISREPLY */
+ if (reply_to) {
+ delegated = reply_to;
+ }
+ goto skip_is_reply;
+
+ } else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SYNC_TO_ALL,
+ pcmk__str_none)) {
+ // Nothing to do
+
+ } else if (is_reply && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) {
+ process_ping_reply(request);
+ return FALSE;
+
+ } else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_UPGRADE, pcmk__str_none)) {
+ /* Only the DC (node with the oldest software) should process
+ * this operation if F_CIB_SCHEMA_MAX is unset
+ *
+ * If the DC is happy it will then send out another
+ * PCMK__CIB_REQUEST_UPGRADE which will tell all nodes to do the actual
+ * upgrade.
+ *
+ * Except this time F_CIB_SCHEMA_MAX will be set which puts a
+ * limit on how far newer nodes will go
+ */
+ const char *max = crm_element_value(request, F_CIB_SCHEMA_MAX);
+ const char *upgrade_rc = crm_element_value(request, F_CIB_UPGRADE_RC);
+
+ crm_trace("Parsing %s operation%s for %s with max=%s and upgrade_rc=%s",
+ op, (is_reply? " reply" : ""),
+ (based_is_primary? "primary" : "secondary"),
+ (max? max : "none"), (upgrade_rc? upgrade_rc : "none"));
+
+ if (upgrade_rc != NULL) {
+ // Our upgrade request was rejected by DC, notify clients of result
+ crm_xml_add(request, F_CIB_RC, upgrade_rc);
+
+ } else if ((max == NULL) && based_is_primary) {
+ /* We are the DC, check if this upgrade is allowed */
+ goto skip_is_reply;
+
+ } else if(max) {
+ /* Ok, go ahead and upgrade to 'max' */
+ goto skip_is_reply;
+
+ } else {
+ // Ignore broadcast client requests when we're not DC
+ return FALSE;
+ }
+
+ } else if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) {
+ crm_info("Detected legacy %s global update from %s", op, originator);
+ send_sync_request(NULL);
+ legacy_mode = TRUE;
+ return FALSE;
+
+ } else if (is_reply && cib_op_modifies(call_type)) {
+ crm_trace("Ignoring legacy %s reply sent from %s to local clients", op, originator);
+ return FALSE;
+
+ } else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SHUTDOWN, pcmk__str_none)) {
+ /* Legacy handling */
+ crm_debug("Legacy handling of %s message from %s", op, originator);
+ *local_notify = FALSE;
+ if (reply_to == NULL) {
+ *process = TRUE;
+ }
+ return *process;
+ }
+
+ if(is_reply) {
+ crm_trace("Handling %s reply sent from %s to local clients", op, originator);
+ *process = FALSE;
+ *needs_reply = FALSE;
+ *local_notify = TRUE;
+ return TRUE;
+ }
+
+ skip_is_reply:
+ *process = TRUE;
+ *needs_reply = FALSE;
+
+ *local_notify = pcmk__str_eq(delegated, OUR_NODENAME, pcmk__str_casei);
+
+ host = crm_element_value(request, F_CIB_HOST);
+ if (pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei)) {
+ crm_trace("Processing %s request sent to us from %s", op, originator);
+ *needs_reply = TRUE;
+ return TRUE;
+
+ } else if (host != NULL) {
+ /* this is for a specific instance and we're not it */
+ crm_trace("Ignoring %s operation for instance on %s", op, host);
+ return FALSE;
+
+ } else if(is_reply == FALSE && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) {
+ *needs_reply = TRUE;
+ }
+
+ crm_trace("Processing %s request sent to everyone by %s/%s on %s %s", op,
+ crm_element_value(request, F_CIB_CLIENTNAME),
+ crm_element_value(request, F_CIB_CALLID),
+ originator, (*local_notify)?"(notify)":"");
+ return TRUE;
+}
+
+static gboolean
+parse_peer_options(int call_type, xmlNode * request,
+ gboolean * local_notify, gboolean * needs_reply, gboolean * process,
+ gboolean * needs_forward)
+{
+ /* TODO: What happens when an update comes in after node A
+ * requests the CIB from node B, but before it gets the reply (and
+ * sends out the replace operation)
+ */
+ if(cib_legacy_mode()) {
+ return parse_peer_options_v1(
+ call_type, request, local_notify, needs_reply, process, needs_forward);
+ } else {
+ return parse_peer_options_v2(
+ call_type, request, local_notify, needs_reply, process, needs_forward);
+ }
+}
+
+static void
+forward_request(xmlNode *request, int call_options)
+{
+ const char *op = crm_element_value(request, F_CIB_OPERATION);
+ const char *host = crm_element_value(request, F_CIB_HOST);
+
+ crm_xml_add(request, F_CIB_DELEGATED, OUR_NODENAME);
+
+ if (host != NULL) {
+ crm_trace("Forwarding %s op to %s", op, host);
+ send_cluster_message(crm_get_peer(0, host), crm_msg_cib, request, FALSE);
+
+ } else {
+ crm_trace("Forwarding %s op to primary instance", op);
+ send_cluster_message(NULL, crm_msg_cib, request, FALSE);
+ }
+
+ /* Return the request to its original state */
+ xml_remove_prop(request, F_CIB_DELEGATED);
+
+ if (call_options & cib_discard_reply) {
+ crm_trace("Client not interested in reply");
+ }
+}
+
+static gboolean
+send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gboolean broadcast)
+{
+ CRM_ASSERT(msg != NULL);
+
+ if (broadcast) {
+ /* this (successful) call modified the CIB _and_ the
+ * change needs to be broadcast...
+ * send via HA to other nodes
+ */
+ int diff_add_updates = 0;
+ int diff_add_epoch = 0;
+ int diff_add_admin_epoch = 0;
+
+ int diff_del_updates = 0;
+ int diff_del_epoch = 0;
+ int diff_del_admin_epoch = 0;
+
+ const char *digest = NULL;
+ int format = 1;
+
+ CRM_LOG_ASSERT(result_diff != NULL);
+ digest = crm_element_value(result_diff, XML_ATTR_DIGEST);
+ crm_element_value_int(result_diff, "format", &format);
+
+ cib_diff_version_details(result_diff,
+ &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates,
+ &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates);
+
+ crm_trace("Sending update diff %d.%d.%d -> %d.%d.%d %s",
+ diff_del_admin_epoch, diff_del_epoch, diff_del_updates,
+ diff_add_admin_epoch, diff_add_epoch, diff_add_updates, digest);
+
+ crm_xml_add(msg, F_CIB_ISREPLY, originator);
+ pcmk__xe_set_bool_attr(msg, F_CIB_GLOBAL_UPDATE, true);
+ crm_xml_add(msg, F_CIB_OPERATION, PCMK__CIB_REQUEST_APPLY_PATCH);
+ crm_xml_add(msg, F_CIB_USER, CRM_DAEMON_USER);
+
+ if (format == 1) {
+ CRM_ASSERT(digest != NULL);
+ }
+
+ add_message_xml(msg, F_CIB_UPDATE_DIFF, result_diff);
+ crm_log_xml_explicit(msg, "copy");
+ return send_cluster_message(NULL, crm_msg_cib, msg, TRUE);
+
+ } else if (originator != NULL) {
+ /* send reply via HA to originating node */
+ crm_trace("Sending request result to %s only", originator);
+ crm_xml_add(msg, F_CIB_ISREPLY, originator);
+ return send_cluster_message(crm_get_peer(0, originator), crm_msg_cib, msg, FALSE);
+ }
+
+ return FALSE;
+}
+
+/*!
+ * \internal
+ * \brief Handle an IPC or CPG message containing a request
+ *
+ * \param[in,out] request Request XML
+ * \param[in] privileged Whether privileged commands may be run
+ * (see cib_server_ops[] definition)
+ * \param[in] cib_client IPC client that sent request (or NULL if CPG)
+ */
+static void
+cib_process_request(xmlNode *request, gboolean privileged,
+ const pcmk__client_t *cib_client)
+{
+ int call_type = 0;
+ int call_options = 0;
+
+ gboolean process = TRUE; // Whether to process request locally now
+ gboolean is_update = TRUE; // Whether request would modify CIB
+ gboolean needs_reply = TRUE; // Whether to build a reply
+ gboolean local_notify = FALSE; // Whether to notify (local) requester
+ gboolean needs_forward = FALSE; // Whether to forward request somewhere else
+
+ xmlNode *op_reply = NULL;
+ xmlNode *result_diff = NULL;
+
+ int rc = pcmk_ok;
+ const char *op = crm_element_value(request, F_CIB_OPERATION);
+ const char *originator = crm_element_value(request, F_ORIG);
+ const char *host = crm_element_value(request, F_CIB_HOST);
+ const char *target = NULL;
+ const char *call_id = crm_element_value(request, F_CIB_CALLID);
+ const char *client_id = crm_element_value(request, F_CIB_CLIENTID);
+ const char *client_name = crm_element_value(request, F_CIB_CLIENTNAME);
+ const char *reply_to = crm_element_value(request, F_CIB_ISREPLY);
+
+ crm_element_value_int(request, F_CIB_CALLOPTS, &call_options);
+
+ if ((host != NULL) && (*host == '\0')) {
+ host = NULL;
+ }
+
+ if (host) {
+ target = host;
+
+ } else if (call_options & cib_scope_local) {
+ target = "local host";
+
+ } else {
+ target = "primary";
+ }
+
+ if (cib_client == NULL) {
+ crm_trace("Processing peer %s operation from %s/%s on %s intended for %s (reply=%s)",
+ op, client_name, call_id, originator, target, reply_to);
+ } else {
+ crm_xml_add(request, F_ORIG, OUR_NODENAME);
+ crm_trace("Processing local %s operation from %s/%s intended for %s", op, client_name, call_id, target);
+ }
+
+ rc = cib_get_operation_id(op, &call_type);
+ if (rc != pcmk_ok) {
+ /* TODO: construct error reply? */
+ crm_err("Pre-processing of command failed: %s", pcmk_strerror(rc));
+ return;
+ }
+
+ if (cib_client != NULL) {
+ parse_local_options(cib_client, call_type, call_options, host, op,
+ &local_notify, &needs_reply, &process, &needs_forward);
+
+ } else if (parse_peer_options(call_type, request, &local_notify,
+ &needs_reply, &process, &needs_forward) == FALSE) {
+ return;
+ }
+
+ is_update = cib_op_modifies(call_type);
+
+ if (call_options & cib_discard_reply) {
+ /* If the request will modify the CIB, and we are in legacy mode, we
+ * need to build a reply so we can broadcast a diff, even if the
+ * requester doesn't want one.
+ */
+ needs_reply = is_update && cib_legacy_mode();
+ local_notify = FALSE;
+ }
+
+ if (needs_forward) {
+ const char *section = crm_element_value(request, F_CIB_SECTION);
+ int log_level = LOG_INFO;
+
+ if (pcmk__str_eq(op, PCMK__CIB_REQUEST_NOOP, pcmk__str_none)) {
+ log_level = LOG_DEBUG;
+ }
+
+ do_crm_log(log_level,
+ "Forwarding %s operation for section %s to %s (origin=%s/%s/%s)",
+ op,
+ section ? section : "'all'",
+ pcmk__s(host, (cib_legacy_mode() ? "primary" : "all")),
+ originator ? originator : "local",
+ client_name, call_id);
+
+ forward_request(request, call_options);
+ return;
+ }
+
+ if (cib_status != pcmk_ok) {
+ const char *call = crm_element_value(request, F_CIB_CALLID);
+
+ rc = cib_status;
+ crm_err("Operation ignored, cluster configuration is invalid."
+ " Please repair and restart: %s", pcmk_strerror(cib_status));
+
+ op_reply = create_xml_node(NULL, "cib-reply");
+ crm_xml_add(op_reply, F_TYPE, T_CIB);
+ crm_xml_add(op_reply, F_CIB_OPERATION, op);
+ crm_xml_add(op_reply, F_CIB_CALLID, call);
+ crm_xml_add(op_reply, F_CIB_CLIENTID, client_id);
+ crm_xml_add_int(op_reply, F_CIB_CALLOPTS, call_options);
+ crm_xml_add_int(op_reply, F_CIB_RC, rc);
+
+ crm_trace("Attaching reply output");
+ add_message_xml(op_reply, F_CIB_CALLDATA, the_cib);
+
+ crm_log_xml_explicit(op_reply, "cib:reply");
+
+ } else if (process) {
+ time_t finished = 0;
+ time_t now = time(NULL);
+ int level = LOG_INFO;
+ const char *section = crm_element_value(request, F_CIB_SECTION);
+
+ rc = cib_process_command(request, &op_reply, &result_diff, privileged);
+
+ if (!is_update) {
+ level = LOG_TRACE;
+
+ } else if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) {
+ switch (rc) {
+ case pcmk_ok:
+ level = LOG_INFO;
+ break;
+ case -pcmk_err_old_data:
+ case -pcmk_err_diff_resync:
+ case -pcmk_err_diff_failed:
+ level = LOG_TRACE;
+ break;
+ default:
+ level = LOG_ERR;
+ }
+
+ } else if (rc != pcmk_ok) {
+ level = LOG_WARNING;
+ }
+
+ do_crm_log(level,
+ "Completed %s operation for section %s: %s (rc=%d, origin=%s/%s/%s, version=%s.%s.%s)",
+ op, section ? section : "'all'", pcmk_strerror(rc), rc,
+ originator ? originator : "local", client_name, call_id,
+ the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION_ADMIN) : "0",
+ the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION) : "0",
+ the_cib ? crm_element_value(the_cib, XML_ATTR_NUMUPDATES) : "0");
+
+ finished = time(NULL);
+ if ((finished - now) > 3) {
+ crm_trace("%s operation took %lds to complete", op, (long)(finished - now));
+ crm_write_blackbox(0, NULL);
+ }
+
+ if (op_reply == NULL && (needs_reply || local_notify)) {
+ crm_err("Unexpected NULL reply to message");
+ crm_log_xml_err(request, "null reply");
+ needs_reply = FALSE;
+ local_notify = FALSE;
+ }
+ }
+
+ if (is_update && !cib_legacy_mode()) {
+ crm_trace("Completed pre-sync update from %s/%s/%s%s",
+ originator ? originator : "local", client_name, call_id,
+ local_notify?" with local notification":"");
+
+ } else if (!needs_reply || stand_alone) {
+ // This was a non-originating secondary update
+ crm_trace("Completed update as secondary");
+
+ } else if (cib_legacy_mode() &&
+ rc == pcmk_ok && result_diff != NULL && !(call_options & cib_inhibit_bcast)) {
+ gboolean broadcast = FALSE;
+
+ cib_local_bcast_num++;
+ crm_xml_add_int(request, F_CIB_LOCAL_NOTIFY_ID, cib_local_bcast_num);
+ broadcast = send_peer_reply(request, result_diff, originator, TRUE);
+
+ if (broadcast && client_id && local_notify && op_reply) {
+
+ /* If we have been asked to sync the reply,
+ * and a bcast msg has gone out, we queue the local notify
+ * until we know the bcast message has been received */
+ local_notify = FALSE;
+ crm_trace("Queuing local %ssync notification for %s",
+ (call_options & cib_sync_call) ? "" : "a-", client_id);
+
+ queue_local_notify(op_reply, client_id,
+ pcmk_is_set(call_options, cib_sync_call),
+ (cib_client == NULL));
+ op_reply = NULL; /* the reply is queued, so don't free here */
+ }
+
+ } else if (call_options & cib_discard_reply) {
+ crm_trace("Caller isn't interested in reply");
+
+ } else if (cib_client == NULL) {
+ if (is_update == FALSE || result_diff == NULL) {
+ crm_trace("Request not broadcast: R/O call");
+
+ } else if (call_options & cib_inhibit_bcast) {
+ crm_trace("Request not broadcast: inhibited");
+
+ } else if (rc != pcmk_ok) {
+ crm_trace("Request not broadcast: call failed: %s", pcmk_strerror(rc));
+
+ } else {
+ crm_trace("Directing reply to %s", originator);
+ }
+
+ send_peer_reply(op_reply, result_diff, originator, FALSE);
+ }
+
+ if (local_notify && client_id) {
+ crm_trace("Performing local %ssync notification for %s",
+ (pcmk_is_set(call_options, cib_sync_call)? "" : "a"),
+ client_id);
+ if (process == FALSE) {
+ do_local_notify(request, client_id,
+ pcmk_is_set(call_options, cib_sync_call),
+ (cib_client == NULL));
+ } else {
+ do_local_notify(op_reply, client_id,
+ pcmk_is_set(call_options, cib_sync_call),
+ (cib_client == NULL));
+ }
+ }
+
+ free_xml(op_reply);
+ free_xml(result_diff);
+
+ return;
+}
+
+static char *
+calculate_section_digest(const char *xpath, xmlNode * xml_obj)
+{
+ xmlNode *xml_section = NULL;
+
+ if (xml_obj == NULL) {
+ return NULL;
+ }
+
+ xml_section = get_xpath_object(xpath, xml_obj, LOG_TRACE);
+ if (xml_section == NULL) {
+ return NULL;
+ }
+ return calculate_xml_versioned_digest(xml_section, FALSE, TRUE, CRM_FEATURE_SET);
+
+}
+
+// v1 and v2 patch formats
+#define XPATH_CONFIG_CHANGE \
+ "//" XML_CIB_TAG_CRMCONFIG " | " \
+ "//" XML_DIFF_CHANGE \
+ "[contains(@" XML_DIFF_PATH ",'/" XML_CIB_TAG_CRMCONFIG "/')]"
+
+static bool
+contains_config_change(xmlNode *diff)
+{
+ bool changed = false;
+
+ if (diff) {
+ xmlXPathObject *xpathObj = xpath_search(diff, XPATH_CONFIG_CHANGE);
+
+ if (numXpathResults(xpathObj) > 0) {
+ changed = true;
+ }
+ freeXpathObject(xpathObj);
+ }
+ return changed;
+}
+
+static int
+cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gboolean privileged)
+{
+ xmlNode *input = NULL;
+ xmlNode *output = NULL;
+ xmlNode *result_cib = NULL;
+ xmlNode *current_cib = NULL;
+
+ int call_type = 0;
+ int call_options = 0;
+
+ const char *op = NULL;
+ const char *section = NULL;
+ const char *call_id = crm_element_value(request, F_CIB_CALLID);
+ const char *client_id = crm_element_value(request, F_CIB_CLIENTID);
+ const char *client_name = crm_element_value(request, F_CIB_CLIENTNAME);
+ const char *origin = crm_element_value(request, F_ORIG);
+
+ int rc = pcmk_ok;
+ int rc2 = pcmk_ok;
+
+ gboolean send_r_notify = FALSE;
+ gboolean config_changed = FALSE;
+ gboolean manage_counters = TRUE;
+
+ static mainloop_timer_t *digest_timer = NULL;
+
+ char *current_nodes_digest = NULL;
+ char *current_alerts_digest = NULL;
+ char *current_status_digest = NULL;
+ uint32_t change_section = cib_change_section_nodes
+ |cib_change_section_alerts
+ |cib_change_section_status;
+
+ CRM_ASSERT(cib_status == pcmk_ok);
+
+ if(digest_timer == NULL) {
+ digest_timer = mainloop_timer_add("digester", 5000, FALSE, cib_digester_cb, NULL);
+ }
+
+ *reply = NULL;
+ *cib_diff = NULL;
+ current_cib = the_cib;
+
+ /* Start processing the request... */
+ op = crm_element_value(request, F_CIB_OPERATION);
+ crm_element_value_int(request, F_CIB_CALLOPTS, &call_options);
+ rc = cib_get_operation_id(op, &call_type);
+
+ if (rc == pcmk_ok && privileged == FALSE) {
+ rc = cib_op_can_run(call_type, call_options, privileged);
+ }
+
+ rc2 = cib_op_prepare(call_type, request, &input, &section);
+ if (rc == pcmk_ok) {
+ rc = rc2;
+ }
+
+ if (rc != pcmk_ok) {
+ crm_trace("Call setup failed: %s", pcmk_strerror(rc));
+ goto done;
+
+ } else if (cib_op_modifies(call_type) == FALSE) {
+ rc = cib_perform_op(op, call_options, cib_op_func(call_type), TRUE,
+ section, request, input, FALSE, &config_changed,
+ current_cib, &result_cib, NULL, &output);
+
+ CRM_CHECK(result_cib == NULL, free_xml(result_cib));
+ goto done;
+ }
+
+ /* Handle a valid write action */
+ if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) {
+ /* legacy code */
+ manage_counters = FALSE;
+ cib__set_call_options(call_options, "call", cib_force_diff);
+ crm_trace("Global update detected");
+
+ CRM_CHECK(call_type == 3 || call_type == 4, crm_err("Call type: %d", call_type);
+ crm_log_xml_err(request, "bad op"));
+ }
+
+ ping_modified_since = TRUE;
+ if (pcmk_is_set(call_options, cib_inhibit_bcast)) {
+ crm_trace("Skipping update: inhibit broadcast");
+ manage_counters = FALSE;
+ }
+
+ if (!pcmk_is_set(call_options, cib_dryrun)
+ && pcmk__str_eq(section, XML_CIB_TAG_STATUS, pcmk__str_casei)) {
+ // Copying large CIBs accounts for a huge percentage of our CIB usage
+ cib__set_call_options(call_options, "call", cib_zero_copy);
+ } else {
+ cib__clear_call_options(call_options, "call", cib_zero_copy);
+ }
+
+#define XPATH_CONFIG "//" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION
+#define XPATH_NODES XPATH_CONFIG "/" XML_CIB_TAG_NODES
+#define XPATH_ALERTS XPATH_CONFIG "/" XML_CIB_TAG_ALERTS
+#define XPATH_STATUS "//" XML_TAG_CIB "/" XML_CIB_TAG_STATUS
+
+ // Calculate the hash value of the section before the change
+ if (pcmk__str_eq(PCMK__CIB_REQUEST_REPLACE, op, pcmk__str_none)) {
+ current_nodes_digest = calculate_section_digest(XPATH_NODES,
+ current_cib);
+ current_alerts_digest = calculate_section_digest(XPATH_ALERTS,
+ current_cib);
+ current_status_digest = calculate_section_digest(XPATH_STATUS,
+ current_cib);
+ crm_trace("current-digest %s:%s:%s", current_nodes_digest,
+ current_alerts_digest, current_status_digest);
+ }
+
+ // result_cib must not be modified after cib_perform_op() returns
+ rc = cib_perform_op(op, call_options, cib_op_func(call_type), FALSE,
+ section, request, input, manage_counters,
+ &config_changed, current_cib, &result_cib, cib_diff,
+ &output);
+
+ if (!manage_counters) {
+ int format = 1;
+
+ /* Legacy code
+ * If the diff is NULL at this point, it's because nothing changed
+ */
+ if (*cib_diff != NULL) {
+ crm_element_value_int(*cib_diff, "format", &format);
+ }
+
+ if (format == 1) {
+ config_changed = cib__config_changed_v1(NULL, NULL, cib_diff);
+ }
+ }
+
+ /* Always write to disk for successful replace and upgrade ops. This also
+ * negates the need to detect ordering changes.
+ */
+ if ((rc == pcmk_ok)
+ && pcmk__str_any_of(op,
+ PCMK__CIB_REQUEST_REPLACE,
+ PCMK__CIB_REQUEST_UPGRADE,
+ NULL)) {
+ config_changed = TRUE;
+ }
+
+ if (rc == pcmk_ok && !pcmk_is_set(call_options, cib_dryrun)) {
+ crm_trace("Activating %s->%s%s%s",
+ crm_element_value(current_cib, XML_ATTR_NUMUPDATES),
+ crm_element_value(result_cib, XML_ATTR_NUMUPDATES),
+ (pcmk_is_set(call_options, cib_zero_copy)? " zero-copy" : ""),
+ (config_changed? " changed" : ""));
+ if (!pcmk_is_set(call_options, cib_zero_copy)) {
+ rc = activateCibXml(result_cib, config_changed, op);
+ crm_trace("Activated %s (%d)",
+ crm_element_value(current_cib, XML_ATTR_NUMUPDATES), rc);
+ }
+
+ if ((rc == pcmk_ok) && contains_config_change(*cib_diff)) {
+ cib_read_config(config_hash, result_cib);
+ }
+
+ if (pcmk__str_eq(PCMK__CIB_REQUEST_REPLACE, op, pcmk__str_none)) {
+ char *result_nodes_digest = NULL;
+ char *result_alerts_digest = NULL;
+ char *result_status_digest = NULL;
+
+ /* Calculate the hash value of the changed section. */
+ result_nodes_digest = calculate_section_digest(XPATH_NODES,
+ result_cib);
+ result_alerts_digest = calculate_section_digest(XPATH_ALERTS,
+ result_cib);
+ result_status_digest = calculate_section_digest(XPATH_STATUS,
+ result_cib);
+ crm_trace("result-digest %s:%s:%s", result_nodes_digest,
+ result_alerts_digest, result_status_digest);
+
+ if (pcmk__str_eq(current_nodes_digest, result_nodes_digest,
+ pcmk__str_none)) {
+ change_section =
+ pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE,
+ "CIB change section",
+ "change_section", change_section,
+ cib_change_section_nodes, "nodes");
+ }
+
+ if (pcmk__str_eq(current_alerts_digest, result_alerts_digest,
+ pcmk__str_none)) {
+ change_section =
+ pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE,
+ "CIB change section",
+ "change_section", change_section,
+ cib_change_section_alerts, "alerts");
+ }
+
+ if (pcmk__str_eq(current_status_digest, result_status_digest,
+ pcmk__str_none)) {
+ change_section =
+ pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE,
+ "CIB change section",
+ "change_section", change_section,
+ cib_change_section_status, "status");
+ }
+
+ if (change_section != cib_change_section_none) {
+ send_r_notify = TRUE;
+ }
+
+ free(result_nodes_digest);
+ free(result_alerts_digest);
+ free(result_status_digest);
+
+ } else if (pcmk__str_eq(PCMK__CIB_REQUEST_ERASE, op, pcmk__str_none)) {
+ send_r_notify = TRUE;
+ }
+
+ mainloop_timer_stop(digest_timer);
+ mainloop_timer_start(digest_timer);
+
+ } else if (rc == -pcmk_err_schema_validation) {
+ CRM_ASSERT(!pcmk_is_set(call_options, cib_zero_copy));
+
+ if (output != NULL) {
+ crm_log_xml_info(output, "cib:output");
+ free_xml(output);
+ }
+
+ output = result_cib;
+
+ } else {
+ crm_trace("Not activating %d %d %s", rc,
+ pcmk_is_set(call_options, cib_dryrun),
+ crm_element_value(result_cib, XML_ATTR_NUMUPDATES));
+ if (!pcmk_is_set(call_options, cib_zero_copy)) {
+ free_xml(result_cib);
+ }
+ }
+
+ if ((call_options & (cib_inhibit_notify|cib_dryrun)) == 0) {
+ crm_trace("Sending notifications %d",
+ pcmk_is_set(call_options, cib_dryrun));
+ cib_diff_notify(op, rc, call_id, client_id, client_name, origin, input,
+ *cib_diff);
+ }
+
+ if (send_r_notify) {
+ cib_replace_notify(op, rc, call_id, client_id, client_name, origin,
+ the_cib, *cib_diff, change_section);
+ }
+
+ pcmk__output_set_log_level(logger_out, LOG_TRACE);
+ logger_out->message(logger_out, "xml-patchset", *cib_diff);
+
+ done:
+ if (!pcmk_is_set(call_options, cib_discard_reply) || cib_legacy_mode()) {
+ const char *caller = crm_element_value(request, F_CIB_CLIENTID);
+
+ *reply = create_xml_node(NULL, "cib-reply");
+ crm_xml_add(*reply, F_TYPE, T_CIB);
+ crm_xml_add(*reply, F_CIB_OPERATION, op);
+ crm_xml_add(*reply, F_CIB_CALLID, call_id);
+ crm_xml_add(*reply, F_CIB_CLIENTID, caller);
+ crm_xml_add_int(*reply, F_CIB_CALLOPTS, call_options);
+ crm_xml_add_int(*reply, F_CIB_RC, rc);
+
+ if (output != NULL) {
+ crm_trace("Attaching reply output");
+ add_message_xml(*reply, F_CIB_CALLDATA, output);
+ }
+
+ crm_log_xml_explicit(*reply, "cib:reply");
+ }
+
+ crm_trace("cleanup");
+
+ if (cib_op_modifies(call_type) == FALSE && output != current_cib) {
+ free_xml(output);
+ output = NULL;
+ }
+
+ if (call_type >= 0) {
+ cib_op_cleanup(call_type, call_options, &input, &output);
+ }
+
+ free(current_nodes_digest);
+ free(current_alerts_digest);
+ free(current_status_digest);
+
+ crm_trace("done");
+ return rc;
+}
+
+void
+cib_peer_callback(xmlNode * msg, void *private_data)
+{
+ const char *reason = NULL;
+ const char *originator = crm_element_value(msg, F_ORIG);
+
+ if (cib_legacy_mode()
+ && pcmk__str_eq(originator, OUR_NODENAME,
+ pcmk__str_casei|pcmk__str_null_matches)) {
+ /* message is from ourselves */
+ int bcast_id = 0;
+
+ if (!(crm_element_value_int(msg, F_CIB_LOCAL_NOTIFY_ID, &bcast_id))) {
+ check_local_notify(bcast_id);
+ }
+ return;
+
+ } else if (crm_peer_cache == NULL) {
+ reason = "membership not established";
+ goto bail;
+ }
+
+ if (crm_element_value(msg, F_CIB_CLIENTNAME) == NULL) {
+ crm_xml_add(msg, F_CIB_CLIENTNAME, originator);
+ }
+
+ /* crm_log_xml_trace(msg, "Peer[inbound]"); */
+ cib_process_request(msg, TRUE, NULL);
+ return;
+
+ bail:
+ if (reason) {
+ const char *seq = crm_element_value(msg, F_SEQ);
+ const char *op = crm_element_value(msg, F_CIB_OPERATION);
+
+ crm_warn("Discarding %s message (%s) from %s: %s", op, seq, originator, reason);
+ }
+}
+
+static gboolean
+cib_force_exit(gpointer data)
+{
+ crm_notice("Forcing exit!");
+ terminate_cib(__func__, CRM_EX_ERROR);
+ return FALSE;
+}
+
+static void
+disconnect_remote_client(gpointer key, gpointer value, gpointer user_data)
+{
+ pcmk__client_t *a_client = value;
+
+ crm_err("Can't disconnect client %s: Not implemented",
+ pcmk__client_name(a_client));
+}
+
+static void
+initiate_exit(void)
+{
+ int active = 0;
+ xmlNode *leaving = NULL;
+
+ active = crm_active_peers();
+ if (active < 2) {
+ terminate_cib(__func__, 0);
+ return;
+ }
+
+ crm_info("Sending disconnect notification to %d peers...", active);
+
+ leaving = create_xml_node(NULL, "exit-notification");
+ crm_xml_add(leaving, F_TYPE, "cib");
+ crm_xml_add(leaving, F_CIB_OPERATION, PCMK__CIB_REQUEST_SHUTDOWN);
+
+ send_cluster_message(NULL, crm_msg_cib, leaving, TRUE);
+ free_xml(leaving);
+
+ g_timeout_add(EXIT_ESCALATION_MS, cib_force_exit, NULL);
+}
+
+void
+cib_shutdown(int nsig)
+{
+ struct qb_ipcs_stats srv_stats;
+
+ if (cib_shutdown_flag == FALSE) {
+ int disconnects = 0;
+ qb_ipcs_connection_t *c = NULL;
+
+ cib_shutdown_flag = TRUE;
+
+ c = qb_ipcs_connection_first_get(ipcs_rw);
+ while (c != NULL) {
+ qb_ipcs_connection_t *last = c;
+
+ c = qb_ipcs_connection_next_get(ipcs_rw, last);
+
+ crm_debug("Disconnecting r/w client %p...", last);
+ qb_ipcs_disconnect(last);
+ qb_ipcs_connection_unref(last);
+ disconnects++;
+ }
+
+ c = qb_ipcs_connection_first_get(ipcs_ro);
+ while (c != NULL) {
+ qb_ipcs_connection_t *last = c;
+
+ c = qb_ipcs_connection_next_get(ipcs_ro, last);
+
+ crm_debug("Disconnecting r/o client %p...", last);
+ qb_ipcs_disconnect(last);
+ qb_ipcs_connection_unref(last);
+ disconnects++;
+ }
+
+ c = qb_ipcs_connection_first_get(ipcs_shm);
+ while (c != NULL) {
+ qb_ipcs_connection_t *last = c;
+
+ c = qb_ipcs_connection_next_get(ipcs_shm, last);
+
+ crm_debug("Disconnecting non-blocking r/w client %p...", last);
+ qb_ipcs_disconnect(last);
+ qb_ipcs_connection_unref(last);
+ disconnects++;
+ }
+
+ disconnects += pcmk__ipc_client_count();
+
+ crm_debug("Disconnecting %d remote clients", pcmk__ipc_client_count());
+ pcmk__foreach_ipc_client(disconnect_remote_client, NULL);
+ crm_info("Disconnected %d clients", disconnects);
+ }
+
+ qb_ipcs_stats_get(ipcs_rw, &srv_stats, QB_FALSE);
+
+ if (pcmk__ipc_client_count() == 0) {
+ crm_info("All clients disconnected (%d)", srv_stats.active_connections);
+ initiate_exit();
+
+ } else {
+ crm_info("Waiting on %d clients to disconnect (%d)",
+ pcmk__ipc_client_count(), srv_stats.active_connections);
+ }
+}
+
+extern int remote_fd;
+extern int remote_tls_fd;
+
+/*!
+ * \internal
+ * \brief Close remote sockets, free the global CIB and quit
+ *
+ * \param[in] caller Name of calling function (for log message)
+ * \param[in] fast If -1, skip disconnect; if positive, exit that
+ */
+void
+terminate_cib(const char *caller, int fast)
+{
+ crm_info("%s: Exiting%s...", caller,
+ (fast > 0)? " fast" : mainloop ? " from mainloop" : "");
+
+ if (remote_fd > 0) {
+ close(remote_fd);
+ remote_fd = 0;
+ }
+ if (remote_tls_fd > 0) {
+ close(remote_tls_fd);
+ remote_tls_fd = 0;
+ }
+
+ uninitializeCib();
+
+ if (logger_out != NULL) {
+ logger_out->finish(logger_out, CRM_EX_OK, true, NULL);
+ pcmk__output_free(logger_out);
+ logger_out = NULL;
+ }
+
+ if (fast > 0) {
+ /* Quit fast on error */
+ pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm);
+ crm_exit(fast);
+
+ } else if ((mainloop != NULL) && g_main_loop_is_running(mainloop)) {
+ /* Quit via returning from the main loop. If fast == -1, we skip the
+ * disconnect here, and it will be done when the main loop returns
+ * (this allows the peer status callback to avoid messing with the
+ * peer caches).
+ */
+ if (fast == 0) {
+ crm_cluster_disconnect(crm_cluster);
+ }
+ g_main_loop_quit(mainloop);
+
+ } else {
+ /* Quit via clean exit. Even the peer status callback can disconnect
+ * here, because we're not returning control to the caller. */
+ crm_cluster_disconnect(crm_cluster);
+ pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm);
+ crm_exit(CRM_EX_OK);
+ }
+}
diff --git a/daemons/based/based_common.c b/daemons/based/based_common.c
new file mode 100644
index 0000000..7e68cf0
--- /dev/null
+++ b/daemons/based/based_common.c
@@ -0,0 +1,352 @@
+/*
+ * Copyright 2008-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <crm/crm.h>
+#include <crm/cib.h>
+#include <crm/msg_xml.h>
+#include <crm/common/ipc.h>
+#include <crm/cluster.h>
+
+#include <crm/common/xml.h>
+
+#include <pacemaker-based.h>
+
+gboolean stand_alone = FALSE;
+
+extern int cib_perform_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff,
+ gboolean privileged);
+
+static xmlNode *
+cib_prepare_common(xmlNode * root, const char *section)
+{
+ xmlNode *data = NULL;
+
+ /* extract the CIB from the fragment */
+ if (root == NULL) {
+ return NULL;
+
+ } else if (pcmk__strcase_any_of(crm_element_name(root), XML_TAG_FRAGMENT,
+ F_CRM_DATA, F_CIB_CALLDATA, NULL)) {
+ data = first_named_child(root, XML_TAG_CIB);
+
+ } else {
+ data = root;
+ }
+
+ /* grab the section specified for the command */
+ if (section != NULL && data != NULL && pcmk__str_eq(crm_element_name(data), XML_TAG_CIB, pcmk__str_none)) {
+ data = pcmk_find_cib_element(data, section);
+ }
+
+ /* crm_log_xml_trace(root, "cib:input"); */
+ return data;
+}
+
+static int
+cib_prepare_none(xmlNode * request, xmlNode ** data, const char **section)
+{
+ *data = NULL;
+ *section = crm_element_value(request, F_CIB_SECTION);
+ return pcmk_ok;
+}
+
+static int
+cib_prepare_data(xmlNode * request, xmlNode ** data, const char **section)
+{
+ xmlNode *input_fragment = get_message_xml(request, F_CIB_CALLDATA);
+
+ *section = crm_element_value(request, F_CIB_SECTION);
+ *data = cib_prepare_common(input_fragment, *section);
+ /* crm_log_xml_debug(*data, "data"); */
+ return pcmk_ok;
+}
+
+static int
+cib_prepare_sync(xmlNode * request, xmlNode ** data, const char **section)
+{
+ *data = NULL;
+ *section = crm_element_value(request, F_CIB_SECTION);
+ return pcmk_ok;
+}
+
+static int
+cib_prepare_diff(xmlNode * request, xmlNode ** data, const char **section)
+{
+ xmlNode *input_fragment = NULL;
+
+ *data = NULL;
+ *section = NULL;
+
+ if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) {
+ input_fragment = get_message_xml(request, F_CIB_UPDATE_DIFF);
+ } else {
+ input_fragment = get_message_xml(request, F_CIB_CALLDATA);
+ }
+
+ CRM_CHECK(input_fragment != NULL, crm_log_xml_warn(request, "no input"));
+ *data = cib_prepare_common(input_fragment, NULL);
+ return pcmk_ok;
+}
+
+static int
+cib_cleanup_query(int options, xmlNode ** data, xmlNode ** output)
+{
+ CRM_LOG_ASSERT(*data == NULL);
+ if ((options & cib_no_children)
+ || pcmk__str_eq(crm_element_name(*output), "xpath-query", pcmk__str_casei)) {
+ free_xml(*output);
+ }
+ return pcmk_ok;
+}
+
+static int
+cib_cleanup_data(int options, xmlNode ** data, xmlNode ** output)
+{
+ free_xml(*output);
+ *data = NULL;
+ return pcmk_ok;
+}
+
+static int
+cib_cleanup_output(int options, xmlNode ** data, xmlNode ** output)
+{
+ free_xml(*output);
+ return pcmk_ok;
+}
+
+static int
+cib_cleanup_none(int options, xmlNode ** data, xmlNode ** output)
+{
+ CRM_LOG_ASSERT(*data == NULL);
+ CRM_LOG_ASSERT(*output == NULL);
+ return pcmk_ok;
+}
+
+static cib_operation_t cib_server_ops[] = {
+ // Booleans are modifies_cib, needs_privileges
+ {
+ NULL, FALSE, FALSE,
+ cib_prepare_none, cib_cleanup_none, cib_process_default
+ },
+ {
+ PCMK__CIB_REQUEST_QUERY, FALSE, FALSE,
+ cib_prepare_none, cib_cleanup_query, cib_process_query
+ },
+ {
+ PCMK__CIB_REQUEST_MODIFY, TRUE, TRUE,
+ cib_prepare_data, cib_cleanup_data, cib_process_modify
+ },
+ {
+ PCMK__CIB_REQUEST_APPLY_PATCH, TRUE, TRUE,
+ cib_prepare_diff, cib_cleanup_data, cib_server_process_diff
+ },
+ {
+ PCMK__CIB_REQUEST_REPLACE, TRUE, TRUE,
+ cib_prepare_data, cib_cleanup_data, cib_process_replace_svr
+ },
+ {
+ PCMK__CIB_REQUEST_CREATE, TRUE, TRUE,
+ cib_prepare_data, cib_cleanup_data, cib_process_create
+ },
+ {
+ PCMK__CIB_REQUEST_DELETE, TRUE, TRUE,
+ cib_prepare_data, cib_cleanup_data, cib_process_delete
+ },
+ {
+ PCMK__CIB_REQUEST_SYNC_TO_ALL, FALSE, TRUE,
+ cib_prepare_sync, cib_cleanup_none, cib_process_sync
+ },
+ {
+ PCMK__CIB_REQUEST_BUMP, TRUE, TRUE,
+ cib_prepare_none, cib_cleanup_output, cib_process_bump
+ },
+ {
+ PCMK__CIB_REQUEST_ERASE, TRUE, TRUE,
+ cib_prepare_none, cib_cleanup_output, cib_process_erase
+ },
+ {
+ PCMK__CIB_REQUEST_NOOP, FALSE, FALSE,
+ cib_prepare_none, cib_cleanup_none, cib_process_default
+ },
+ {
+ PCMK__CIB_REQUEST_ABS_DELETE, TRUE, TRUE,
+ cib_prepare_data, cib_cleanup_data, cib_process_delete_absolute
+ },
+ {
+ PCMK__CIB_REQUEST_UPGRADE, TRUE, TRUE,
+ cib_prepare_none, cib_cleanup_output, cib_process_upgrade_server
+ },
+ {
+ PCMK__CIB_REQUEST_SECONDARY, FALSE, TRUE,
+ cib_prepare_none, cib_cleanup_none, cib_process_readwrite
+ },
+ {
+ PCMK__CIB_REQUEST_ALL_SECONDARY, FALSE, TRUE,
+ cib_prepare_none, cib_cleanup_none, cib_process_readwrite
+ },
+ {
+ PCMK__CIB_REQUEST_SYNC_TO_ONE, FALSE, TRUE,
+ cib_prepare_sync, cib_cleanup_none, cib_process_sync_one
+ },
+ {
+ PCMK__CIB_REQUEST_PRIMARY, TRUE, TRUE,
+ cib_prepare_data, cib_cleanup_data, cib_process_readwrite
+ },
+ {
+ PCMK__CIB_REQUEST_IS_PRIMARY, FALSE, TRUE,
+ cib_prepare_none, cib_cleanup_none, cib_process_readwrite
+ },
+ {
+ PCMK__CIB_REQUEST_SHUTDOWN, FALSE, TRUE,
+ cib_prepare_sync, cib_cleanup_none, cib_process_shutdown_req
+ },
+ {
+ CRM_OP_PING, FALSE, FALSE,
+ cib_prepare_none, cib_cleanup_output, cib_process_ping
+ },
+};
+
+int
+cib_get_operation_id(const char *op, int *operation)
+{
+ static GHashTable *operation_hash = NULL;
+
+ if (operation_hash == NULL) {
+ int lpc = 0;
+ int max_msg_types = PCMK__NELEM(cib_server_ops);
+
+ operation_hash = pcmk__strkey_table(NULL, free);
+ for (lpc = 1; lpc < max_msg_types; lpc++) {
+ int *value = malloc(sizeof(int));
+
+ if(value) {
+ *value = lpc;
+ g_hash_table_insert(operation_hash, (gpointer) cib_server_ops[lpc].operation, value);
+ }
+ }
+ }
+
+ if (op != NULL) {
+ int *value = g_hash_table_lookup(operation_hash, op);
+
+ if (value) {
+ *operation = *value;
+ return pcmk_ok;
+ }
+ }
+ crm_err("Operation %s is not valid", op);
+ *operation = -1;
+ return -EINVAL;
+}
+
+xmlNode *
+cib_msg_copy(xmlNode * msg, gboolean with_data)
+{
+ int lpc = 0;
+ const char *field = NULL;
+ const char *value = NULL;
+ xmlNode *value_struct = NULL;
+
+ static const char *field_list[] = {
+ F_XML_TAGNAME,
+ F_TYPE,
+ F_CIB_CLIENTID,
+ F_CIB_CALLOPTS,
+ F_CIB_CALLID,
+ F_CIB_OPERATION,
+ F_CIB_ISREPLY,
+ F_CIB_SECTION,
+ F_CIB_HOST,
+ F_CIB_RC,
+ F_CIB_DELEGATED,
+ F_CIB_OBJID,
+ F_CIB_OBJTYPE,
+ F_CIB_EXISTING,
+ F_CIB_SEENCOUNT,
+ F_CIB_TIMEOUT,
+ F_CIB_GLOBAL_UPDATE,
+ F_CIB_CLIENTNAME,
+ F_CIB_USER,
+ F_CIB_NOTIFY_TYPE,
+ F_CIB_NOTIFY_ACTIVATE
+ };
+
+ static const char *data_list[] = {
+ F_CIB_CALLDATA,
+ F_CIB_UPDATE,
+ F_CIB_UPDATE_RESULT
+ };
+
+ xmlNode *copy = create_xml_node(NULL, "copy");
+
+ CRM_ASSERT(copy != NULL);
+
+ for (lpc = 0; lpc < PCMK__NELEM(field_list); lpc++) {
+ field = field_list[lpc];
+ value = crm_element_value(msg, field);
+ if (value != NULL) {
+ crm_xml_add(copy, field, value);
+ }
+ }
+ for (lpc = 0; with_data && lpc < PCMK__NELEM(data_list); lpc++) {
+ field = data_list[lpc];
+ value_struct = get_message_xml(msg, field);
+ if (value_struct != NULL) {
+ add_message_xml(copy, field, value_struct);
+ }
+ }
+
+ return copy;
+}
+
+cib_op_t *
+cib_op_func(int call_type)
+{
+ return &(cib_server_ops[call_type].fn);
+}
+
+gboolean
+cib_op_modifies(int call_type)
+{
+ return cib_server_ops[call_type].modifies_cib;
+}
+
+int
+cib_op_can_run(int call_type, int call_options, bool privileged)
+{
+ if (!privileged && cib_server_ops[call_type].needs_privileges) {
+ return -EACCES;
+ }
+ return pcmk_ok;
+}
+
+int
+cib_op_prepare(int call_type, xmlNode * request, xmlNode ** input, const char **section)
+{
+ crm_trace("Prepare %d", call_type);
+ return cib_server_ops[call_type].prepare(request, input, section);
+}
+
+int
+cib_op_cleanup(int call_type, int options, xmlNode ** input, xmlNode ** output)
+{
+ crm_trace("Cleanup %d", call_type);
+ return cib_server_ops[call_type].cleanup(options, input, output);
+}
diff --git a/daemons/based/based_io.c b/daemons/based/based_io.c
new file mode 100644
index 0000000..fc34f39
--- /dev/null
+++ b/daemons/based/based_io.c
@@ -0,0 +1,473 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <dirent.h>
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+
+#include <crm/crm.h>
+
+#include <crm/cib.h>
+#include <crm/common/util.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/cib/internal.h>
+#include <crm/cluster.h>
+
+#include <pacemaker-based.h>
+
+crm_trigger_t *cib_writer = NULL;
+
+int write_cib_contents(gpointer p);
+
+static void
+cib_rename(const char *old)
+{
+ int new_fd;
+ char *new = crm_strdup_printf("%s/cib.auto.XXXXXX", cib_root);
+
+ umask(S_IWGRP | S_IWOTH | S_IROTH);
+ new_fd = mkstemp(new);
+ crm_err("Archiving unusable file %s as %s", old, new);
+ if ((new_fd < 0) || (rename(old, new) < 0)) {
+ crm_perror(LOG_ERR, "Couldn't rename %s as %s", old, new);
+ crm_err("Disabling disk writes and continuing");
+ cib_writes_enabled = FALSE;
+ }
+ if (new_fd > 0) {
+ close(new_fd);
+ }
+ free(new);
+}
+
+/*
+ * It is the callers responsibility to free the output of this function
+ */
+
+static xmlNode *
+retrieveCib(const char *filename, const char *sigfile)
+{
+ xmlNode *root = NULL;
+
+ crm_info("Reading cluster configuration file %s (digest: %s)",
+ filename, sigfile);
+ switch (cib_file_read_and_verify(filename, sigfile, &root)) {
+ case -pcmk_err_cib_corrupt:
+ crm_warn("Continuing but %s will NOT be used.", filename);
+ break;
+
+ case -pcmk_err_cib_modified:
+ /* Archive the original files so the contents are not lost */
+ crm_warn("Continuing but %s will NOT be used.", filename);
+ cib_rename(filename);
+ cib_rename(sigfile);
+ break;
+ }
+ return root;
+}
+
+/*
+ * for OSs without support for direntry->d_type, like Solaris
+ */
+#ifndef DT_UNKNOWN
+# define DT_UNKNOWN 0
+# define DT_FIFO 1
+# define DT_CHR 2
+# define DT_DIR 4
+# define DT_BLK 6
+# define DT_REG 8
+# define DT_LNK 10
+# define DT_SOCK 12
+# define DT_WHT 14
+#endif /*DT_UNKNOWN*/
+
+static int cib_archive_filter(const struct dirent * a)
+{
+ int rc = 0;
+ /* Looking for regular files (d_type = 8) starting with 'cib-' and not ending in .sig */
+ struct stat s;
+ char *a_path = crm_strdup_printf("%s/%s", cib_root, a->d_name);
+
+ if(stat(a_path, &s) != 0) {
+ rc = errno;
+ crm_trace("%s - stat failed: %s (%d)", a->d_name, pcmk_strerror(rc), rc);
+ rc = 0;
+
+ } else if ((s.st_mode & S_IFREG) != S_IFREG) {
+ unsigned char dtype;
+#ifdef HAVE_STRUCT_DIRENT_D_TYPE
+ dtype = a->d_type;
+#else
+ switch (s.st_mode & S_IFMT) {
+ case S_IFREG: dtype = DT_REG; break;
+ case S_IFDIR: dtype = DT_DIR; break;
+ case S_IFCHR: dtype = DT_CHR; break;
+ case S_IFBLK: dtype = DT_BLK; break;
+ case S_IFLNK: dtype = DT_LNK; break;
+ case S_IFIFO: dtype = DT_FIFO; break;
+ case S_IFSOCK: dtype = DT_SOCK; break;
+ default: dtype = DT_UNKNOWN; break;
+ }
+#endif
+ crm_trace("%s - wrong type (%d)", a->d_name, dtype);
+
+ } else if(strstr(a->d_name, "cib-") != a->d_name) {
+ crm_trace("%s - wrong prefix", a->d_name);
+
+ } else if (pcmk__ends_with_ext(a->d_name, ".sig")) {
+ crm_trace("%s - wrong suffix", a->d_name);
+
+ } else {
+ crm_debug("%s - candidate", a->d_name);
+ rc = 1;
+ }
+
+ free(a_path);
+ return rc;
+}
+
+static int cib_archive_sort(const struct dirent ** a, const struct dirent **b)
+{
+ /* Order by creation date - most recently created file first */
+ int rc = 0;
+ struct stat buf;
+
+ time_t a_age = 0;
+ time_t b_age = 0;
+
+ char *a_path = crm_strdup_printf("%s/%s", cib_root, a[0]->d_name);
+ char *b_path = crm_strdup_printf("%s/%s", cib_root, b[0]->d_name);
+
+ if(stat(a_path, &buf) == 0) {
+ a_age = buf.st_ctime;
+ }
+ if(stat(b_path, &buf) == 0) {
+ b_age = buf.st_ctime;
+ }
+
+ free(a_path);
+ free(b_path);
+
+ if(a_age > b_age) {
+ rc = 1;
+ } else if(a_age < b_age) {
+ rc = -1;
+ }
+
+ crm_trace("%s (%lu) vs. %s (%lu) : %d",
+ a[0]->d_name, (unsigned long)a_age,
+ b[0]->d_name, (unsigned long)b_age, rc);
+ return rc;
+}
+
+xmlNode *
+readCibXmlFile(const char *dir, const char *file, gboolean discard_status)
+{
+ struct dirent **namelist = NULL;
+
+ int lpc = 0;
+ char *sigfile = NULL;
+ char *sigfilepath = NULL;
+ char *filename = NULL;
+ const char *name = NULL;
+ const char *value = NULL;
+ const char *validation = NULL;
+ const char *use_valgrind = getenv("PCMK_valgrind_enabled");
+
+ xmlNode *root = NULL;
+ xmlNode *status = NULL;
+
+ sigfile = crm_strdup_printf("%s.sig", file);
+ if (pcmk__daemon_can_write(dir, file) == FALSE
+ || pcmk__daemon_can_write(dir, sigfile) == FALSE) {
+ cib_status = -EACCES;
+ return NULL;
+ }
+
+ filename = crm_strdup_printf("%s/%s", dir, file);
+ sigfilepath = crm_strdup_printf("%s/%s", dir, sigfile);
+ free(sigfile);
+
+ cib_status = pcmk_ok;
+ root = retrieveCib(filename, sigfilepath);
+ free(filename);
+ free(sigfilepath);
+
+ if (root == NULL) {
+ crm_warn("Primary configuration corrupt or unusable, trying backups in %s", cib_root);
+ lpc = scandir(cib_root, &namelist, cib_archive_filter, cib_archive_sort);
+ if (lpc < 0) {
+ crm_perror(LOG_NOTICE, "scandir(%s) failed", cib_root);
+ }
+ }
+
+ while (root == NULL && lpc > 1) {
+ crm_debug("Testing %d candidates", lpc);
+
+ lpc--;
+
+ filename = crm_strdup_printf("%s/%s", cib_root, namelist[lpc]->d_name);
+ sigfile = crm_strdup_printf("%s.sig", filename);
+
+ crm_info("Reading cluster configuration file %s (digest: %s)",
+ filename, sigfile);
+ if (cib_file_read_and_verify(filename, sigfile, &root) < 0) {
+ crm_warn("Continuing but %s will NOT be used.", filename);
+ } else {
+ crm_notice("Continuing with last valid configuration archive: %s", filename);
+ }
+
+ free(namelist[lpc]);
+ free(filename);
+ free(sigfile);
+ }
+ free(namelist);
+
+ if (root == NULL) {
+ root = createEmptyCib(0);
+ crm_warn("Continuing with an empty configuration.");
+ }
+
+ if (cib_writes_enabled && use_valgrind &&
+ (crm_is_true(use_valgrind) || strstr(use_valgrind, "pacemaker-based"))) {
+
+ cib_writes_enabled = FALSE;
+ crm_err("*** Disabling disk writes to avoid confusing Valgrind ***");
+ }
+
+ status = find_xml_node(root, XML_CIB_TAG_STATUS, FALSE);
+ if (discard_status && status != NULL) {
+ /* strip out the status section if there is one */
+ free_xml(status);
+ status = NULL;
+ }
+ if (status == NULL) {
+ create_xml_node(root, XML_CIB_TAG_STATUS);
+ }
+
+ /* Do this before schema validation happens */
+
+ /* fill in some defaults */
+ name = XML_ATTR_GENERATION_ADMIN;
+ value = crm_element_value(root, name);
+ if (value == NULL) {
+ crm_warn("No value for %s was specified in the configuration.", name);
+ crm_warn("The recommended course of action is to shutdown,"
+ " run crm_verify and fix any errors it reports.");
+ crm_warn("We will default to zero and continue but may get"
+ " confused about which configuration to use if"
+ " multiple nodes are powered up at the same time.");
+ crm_xml_add_int(root, name, 0);
+ }
+
+ name = XML_ATTR_GENERATION;
+ value = crm_element_value(root, name);
+ if (value == NULL) {
+ crm_xml_add_int(root, name, 0);
+ }
+
+ name = XML_ATTR_NUMUPDATES;
+ value = crm_element_value(root, name);
+ if (value == NULL) {
+ crm_xml_add_int(root, name, 0);
+ }
+
+ // Unset (DC should set appropriate value)
+ xml_remove_prop(root, XML_ATTR_DC_UUID);
+
+ if (discard_status) {
+ crm_log_xml_trace(root, "[on-disk]");
+ }
+
+ validation = crm_element_value(root, XML_ATTR_VALIDATION);
+ if (validate_xml(root, NULL, TRUE) == FALSE) {
+ crm_err("CIB does not validate with %s",
+ pcmk__s(validation, "no schema specified"));
+ cib_status = -pcmk_err_schema_validation;
+
+ } else if (validation == NULL) {
+ int version = 0;
+
+ update_validation(&root, &version, 0, FALSE, FALSE);
+ if (version > 0) {
+ crm_notice("Enabling %s validation on"
+ " the existing (sane) configuration", get_schema_name(version));
+ } else {
+ crm_err("CIB does not validate with any known schema");
+ cib_status = -pcmk_err_schema_validation;
+ }
+ }
+
+ return root;
+}
+
+gboolean
+uninitializeCib(void)
+{
+ xmlNode *tmp_cib = the_cib;
+
+ if (tmp_cib == NULL) {
+ crm_debug("The CIB has already been deallocated.");
+ return FALSE;
+ }
+
+ the_cib = NULL;
+
+ crm_debug("Deallocating the CIB.");
+
+ free_xml(tmp_cib);
+
+ crm_debug("The CIB has been deallocated.");
+
+ return TRUE;
+}
+
+/*
+ * This method will free the old CIB pointer on success and the new one
+ * on failure.
+ */
+int
+activateCibXml(xmlNode * new_cib, gboolean to_disk, const char *op)
+{
+ if (new_cib) {
+ xmlNode *saved_cib = the_cib;
+
+ CRM_ASSERT(new_cib != saved_cib);
+ the_cib = new_cib;
+ free_xml(saved_cib);
+ if (cib_writes_enabled && cib_status == pcmk_ok && to_disk) {
+ crm_debug("Triggering CIB write for %s op", op);
+ mainloop_set_trigger(cib_writer);
+ }
+ return pcmk_ok;
+ }
+
+ crm_err("Ignoring invalid CIB");
+ if (the_cib) {
+ crm_warn("Reverting to last known CIB");
+ } else {
+ crm_crit("Could not write out new CIB and no saved version to revert to");
+ }
+ return -ENODATA;
+}
+
+static void
+cib_diskwrite_complete(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
+{
+ const char *errmsg = "Could not write CIB to disk";
+
+ if ((exitcode != 0) && cib_writes_enabled) {
+ cib_writes_enabled = FALSE;
+ errmsg = "Disabling CIB disk writes after failure";
+ }
+
+ if ((signo == 0) && (exitcode == 0)) {
+ crm_trace("Disk write [%d] succeeded", (int) pid);
+
+ } else if (signo == 0) {
+ crm_err("%s: process %d exited %d", errmsg, (int) pid, exitcode);
+
+ } else {
+ crm_err("%s: process %d terminated with signal %d (%s)%s",
+ errmsg, (int) pid, signo, strsignal(signo),
+ (core? " and dumped core" : ""));
+ }
+
+ mainloop_trigger_complete(cib_writer);
+}
+
+int
+write_cib_contents(gpointer p)
+{
+ int exit_rc = pcmk_ok;
+ xmlNode *cib_local = NULL;
+
+ /* Make a copy of the CIB to write (possibly in a forked child) */
+ if (p) {
+ /* Synchronous write out */
+ cib_local = copy_xml(p);
+
+ } else {
+ int pid = 0;
+ int bb_state = qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_STATE_GET, 0);
+
+ /* Turn it off before the fork() to avoid:
+ * - 2 processes writing to the same shared mem
+ * - the child needing to disable it
+ * (which would close it from underneath the parent)
+ * This way, the shared mem files are already closed
+ */
+ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE);
+
+ pid = fork();
+ if (pid < 0) {
+ crm_perror(LOG_ERR, "Disabling disk writes after fork failure");
+ cib_writes_enabled = FALSE;
+ return FALSE;
+ }
+
+ if (pid) {
+ /* Parent */
+ mainloop_child_add(pid, 0, "disk-writer", NULL, cib_diskwrite_complete);
+ if (bb_state == QB_LOG_STATE_ENABLED) {
+ /* Re-enable now that it it safe */
+ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE);
+ }
+
+ return -1; /* -1 means 'still work to do' */
+ }
+
+ /* Asynchronous write-out after a fork() */
+
+ /* In theory, we can scribble on the_cib here and not affect the parent,
+ * but let's be safe anyway.
+ */
+ cib_local = copy_xml(the_cib);
+ }
+
+ /* Write the CIB */
+ exit_rc = cib_file_write_with_digest(cib_local, cib_root, "cib.xml");
+
+ /* A nonzero exit code will cause further writes to be disabled */
+ free_xml(cib_local);
+ if (p == NULL) {
+ crm_exit_t exit_code = CRM_EX_OK;
+
+ switch (exit_rc) {
+ case pcmk_ok:
+ exit_code = CRM_EX_OK;
+ break;
+ case pcmk_err_cib_modified:
+ exit_code = CRM_EX_DIGEST; // Existing CIB doesn't match digest
+ break;
+ case pcmk_err_cib_backup: // Existing CIB couldn't be backed up
+ case pcmk_err_cib_save: // New CIB couldn't be saved
+ exit_code = CRM_EX_CANTCREAT;
+ break;
+ default:
+ exit_code = CRM_EX_ERROR;
+ break;
+ }
+
+ /* Use _exit() because exit() could affect the parent adversely */
+ _exit(exit_code);
+ }
+ return exit_rc;
+}
diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c
new file mode 100644
index 0000000..d46456c
--- /dev/null
+++ b/daemons/based/based_messages.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <time.h>
+
+#include <sys/param.h>
+#include <sys/types.h>
+
+#include <crm/crm.h>
+#include <crm/cib/internal.h>
+#include <crm/msg_xml.h>
+
+#include <crm/common/xml.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/common/xml_internal.h>
+#include <crm/cluster/internal.h>
+
+#include <pacemaker-based.h>
+
+/* Maximum number of diffs to ignore while waiting for a resync */
+#define MAX_DIFF_RETRY 5
+
+bool based_is_primary = false;
+
+xmlNode *the_cib = NULL;
+
+int
+cib_process_shutdown_req(const char *op, int options, const char *section, xmlNode * req,
+ xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
+ xmlNode ** answer)
+{
+ const char *host = crm_element_value(req, F_ORIG);
+
+ *answer = NULL;
+
+ if (crm_element_value(req, F_CIB_ISREPLY) == NULL) {
+ crm_info("Peer %s is requesting to shut down", host);
+ return pcmk_ok;
+ }
+
+ if (cib_shutdown_flag == FALSE) {
+ crm_err("Peer %s mistakenly thinks we wanted to shut down", host);
+ return -EINVAL;
+ }
+
+ crm_info("Peer %s has acknowledged our shutdown request", host);
+ terminate_cib(__func__, 0);
+ return pcmk_ok;
+}
+
+int
+cib_process_default(const char *op, int options, const char *section, xmlNode * req,
+ xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
+ xmlNode ** answer)
+{
+ int result = pcmk_ok;
+
+ crm_trace("Processing \"%s\" event", op);
+ *answer = NULL;
+
+ if (op == NULL) {
+ result = -EINVAL;
+ crm_err("No operation specified");
+
+ } else if (strcmp(PCMK__CIB_REQUEST_NOOP, op) != 0) {
+ result = -EPROTONOSUPPORT;
+ crm_err("Action [%s] is not supported by the CIB manager", op);
+ }
+ return result;
+}
+
+int
+cib_process_readwrite(const char *op, int options, const char *section, xmlNode * req,
+ xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
+ xmlNode ** answer)
+{
+ int result = pcmk_ok;
+
+ crm_trace("Processing \"%s\" event", op);
+
+ if (pcmk__str_eq(op, PCMK__CIB_REQUEST_IS_PRIMARY, pcmk__str_none)) {
+ if (based_is_primary) {
+ result = pcmk_ok;
+ } else {
+ result = -EPERM;
+ }
+ return result;
+ }
+
+ if (pcmk__str_eq(op, PCMK__CIB_REQUEST_PRIMARY, pcmk__str_none)) {
+ if (!based_is_primary) {
+ crm_info("We are now in R/W mode");
+ based_is_primary = true;
+ } else {
+ crm_debug("We are still in R/W mode");
+ }
+
+ } else if (based_is_primary) {
+ crm_info("We are now in R/O mode");
+ based_is_primary = false;
+ }
+
+ return result;
+}
+
+/* Set to 1 when a sync is requested, incremented when a diff is ignored,
+ * reset to 0 when a sync is received
+ */
+static int sync_in_progress = 0;
+
+void
+send_sync_request(const char *host)
+{
+ xmlNode *sync_me = create_xml_node(NULL, "sync-me");
+
+ crm_info("Requesting re-sync from %s", (host? host : "all peers"));
+ sync_in_progress = 1;
+
+ crm_xml_add(sync_me, F_TYPE, "cib");
+ crm_xml_add(sync_me, F_CIB_OPERATION, PCMK__CIB_REQUEST_SYNC_TO_ONE);
+ crm_xml_add(sync_me, F_CIB_DELEGATED,
+ stand_alone? "localhost" : crm_cluster->uname);
+
+ send_cluster_message(host ? crm_get_peer(0, host) : NULL, crm_msg_cib, sync_me, FALSE);
+ free_xml(sync_me);
+}
+
+int
+cib_process_ping(const char *op, int options, const char *section, xmlNode * req, xmlNode * input,
+ xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer)
+{
+ const char *host = crm_element_value(req, F_ORIG);
+ const char *seq = crm_element_value(req, F_CIB_PING_ID);
+ char *digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, CRM_FEATURE_SET);
+
+ crm_trace("Processing \"%s\" event %s from %s", op, seq, host);
+ *answer = create_xml_node(NULL, XML_CRM_TAG_PING);
+
+ crm_xml_add(*answer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
+ crm_xml_add(*answer, XML_ATTR_DIGEST, digest);
+ crm_xml_add(*answer, F_CIB_PING_ID, seq);
+
+ pcmk__if_tracing(
+ {
+ // Append additional detail so the receiver can log the differences
+ add_message_xml(*answer, F_CIB_CALLDATA, the_cib);
+ },
+ {
+ // Always include at least the version details
+ const char *tag = TYPE(the_cib);
+ xmlNode *shallow = create_xml_node(NULL, tag);
+
+ copy_in_properties(shallow, the_cib);
+ add_message_xml(*answer, F_CIB_CALLDATA, shallow);
+ free_xml(shallow);
+ }
+ );
+
+ crm_info("Reporting our current digest to %s: %s for %s.%s.%s",
+ host, digest,
+ crm_element_value(existing_cib, XML_ATTR_GENERATION_ADMIN),
+ crm_element_value(existing_cib, XML_ATTR_GENERATION),
+ crm_element_value(existing_cib, XML_ATTR_NUMUPDATES));
+
+ free(digest);
+
+ return pcmk_ok;
+}
+
+int
+cib_process_sync(const char *op, int options, const char *section, xmlNode * req, xmlNode * input,
+ xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer)
+{
+ return sync_our_cib(req, TRUE);
+}
+
+int
+cib_process_upgrade_server(const char *op, int options, const char *section, xmlNode * req, xmlNode * input,
+ xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer)
+{
+ int rc = pcmk_ok;
+
+ *answer = NULL;
+
+ if(crm_element_value(req, F_CIB_SCHEMA_MAX)) {
+ /* The originator of an upgrade request sends it to the DC, without
+ * F_CIB_SCHEMA_MAX. If an upgrade is needed, the DC re-broadcasts the
+ * request with F_CIB_SCHEMA_MAX, and each node performs the upgrade
+ * (and notifies its local clients) here.
+ */
+ return cib_process_upgrade(
+ op, options, section, req, input, existing_cib, result_cib, answer);
+
+ } else {
+ int new_version = 0;
+ int current_version = 0;
+ xmlNode *scratch = copy_xml(existing_cib);
+ const char *host = crm_element_value(req, F_ORIG);
+ const char *value = crm_element_value(existing_cib, XML_ATTR_VALIDATION);
+ const char *client_id = crm_element_value(req, F_CIB_CLIENTID);
+ const char *call_opts = crm_element_value(req, F_CIB_CALLOPTS);
+ const char *call_id = crm_element_value(req, F_CIB_CALLID);
+
+ crm_trace("Processing \"%s\" event", op);
+ if (value != NULL) {
+ current_version = get_schema_version(value);
+ }
+
+ rc = update_validation(&scratch, &new_version, 0, TRUE, TRUE);
+ if (new_version > current_version) {
+ xmlNode *up = create_xml_node(NULL, __func__);
+
+ rc = pcmk_ok;
+ crm_notice("Upgrade request from %s verified", host);
+
+ crm_xml_add(up, F_TYPE, "cib");
+ crm_xml_add(up, F_CIB_OPERATION, PCMK__CIB_REQUEST_UPGRADE);
+ crm_xml_add(up, F_CIB_SCHEMA_MAX, get_schema_name(new_version));
+ crm_xml_add(up, F_CIB_DELEGATED, host);
+ crm_xml_add(up, F_CIB_CLIENTID, client_id);
+ crm_xml_add(up, F_CIB_CALLOPTS, call_opts);
+ crm_xml_add(up, F_CIB_CALLID, call_id);
+
+ if (cib_legacy_mode() && based_is_primary) {
+ rc = cib_process_upgrade(
+ op, options, section, up, input, existing_cib, result_cib, answer);
+
+ } else {
+ send_cluster_message(NULL, crm_msg_cib, up, FALSE);
+ }
+
+ free_xml(up);
+
+ } else if(rc == pcmk_ok) {
+ rc = -pcmk_err_schema_unchanged;
+ }
+
+ if (rc != pcmk_ok) {
+ // Notify originating peer so it can notify its local clients
+ crm_node_t *origin = pcmk__search_cluster_node_cache(0, host);
+
+ crm_info("Rejecting upgrade request from %s: %s "
+ CRM_XS " rc=%d peer=%s", host, pcmk_strerror(rc), rc,
+ (origin? origin->uname : "lost"));
+
+ if (origin) {
+ xmlNode *up = create_xml_node(NULL, __func__);
+
+ crm_xml_add(up, F_TYPE, "cib");
+ crm_xml_add(up, F_CIB_OPERATION, PCMK__CIB_REQUEST_UPGRADE);
+ crm_xml_add(up, F_CIB_DELEGATED, host);
+ crm_xml_add(up, F_CIB_ISREPLY, host);
+ crm_xml_add(up, F_CIB_CLIENTID, client_id);
+ crm_xml_add(up, F_CIB_CALLOPTS, call_opts);
+ crm_xml_add(up, F_CIB_CALLID, call_id);
+ crm_xml_add_int(up, F_CIB_UPGRADE_RC, rc);
+ if (send_cluster_message(origin, crm_msg_cib, up, TRUE)
+ == FALSE) {
+ crm_warn("Could not send CIB upgrade result to %s", host);
+ }
+ free_xml(up);
+ }
+ }
+ free_xml(scratch);
+ }
+ return rc;
+}
+
+int
+cib_process_sync_one(const char *op, int options, const char *section, xmlNode * req,
+ xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
+ xmlNode ** answer)
+{
+ return sync_our_cib(req, FALSE);
+}
+
+int
+cib_server_process_diff(const char *op, int options, const char *section, xmlNode * req,
+ xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
+ xmlNode ** answer)
+{
+ int rc = pcmk_ok;
+
+ if (sync_in_progress > MAX_DIFF_RETRY) {
+ /* Don't ignore diffs forever; the last request may have been lost.
+ * If the diff fails, we'll ask for another full resync.
+ */
+ sync_in_progress = 0;
+ }
+
+ // The primary instance should never ignore a diff
+ if (sync_in_progress && !based_is_primary) {
+ int diff_add_updates = 0;
+ int diff_add_epoch = 0;
+ int diff_add_admin_epoch = 0;
+
+ int diff_del_updates = 0;
+ int diff_del_epoch = 0;
+ int diff_del_admin_epoch = 0;
+
+ cib_diff_version_details(input,
+ &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates,
+ &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates);
+
+ sync_in_progress++;
+ crm_notice("Not applying diff %d.%d.%d -> %d.%d.%d (sync in progress)",
+ diff_del_admin_epoch, diff_del_epoch, diff_del_updates,
+ diff_add_admin_epoch, diff_add_epoch, diff_add_updates);
+ return -pcmk_err_diff_resync;
+ }
+
+ rc = cib_process_diff(op, options, section, req, input, existing_cib, result_cib, answer);
+ crm_trace("result: %s (%d), %s", pcmk_strerror(rc), rc,
+ (based_is_primary? "primary": "secondary"));
+
+ if ((rc == -pcmk_err_diff_resync) && !based_is_primary) {
+ free_xml(*result_cib);
+ *result_cib = NULL;
+ send_sync_request(NULL);
+
+ } else if (rc == -pcmk_err_diff_resync) {
+ rc = -pcmk_err_diff_failed;
+ if (options & cib_force_diff) {
+ crm_warn("Not requesting full refresh in R/W mode");
+ }
+
+ } else if ((rc != pcmk_ok) && !based_is_primary && cib_legacy_mode()) {
+ crm_warn("Requesting full CIB refresh because update failed: %s"
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
+
+ pcmk__output_set_log_level(logger_out, LOG_INFO);
+ logger_out->message(logger_out, "xml-patchset", input);
+ free_xml(*result_cib);
+ *result_cib = NULL;
+ send_sync_request(NULL);
+ }
+
+ return rc;
+}
+
+int
+cib_process_replace_svr(const char *op, int options, const char *section, xmlNode * req,
+ xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
+ xmlNode ** answer)
+{
+ const char *tag = crm_element_name(input);
+ int rc =
+ cib_process_replace(op, options, section, req, input, existing_cib, result_cib, answer);
+ if (rc == pcmk_ok && pcmk__str_eq(tag, XML_TAG_CIB, pcmk__str_casei)) {
+ sync_in_progress = 0;
+ }
+ return rc;
+}
+
+int
+cib_process_delete_absolute(const char *op, int options, const char *section, xmlNode * req,
+ xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
+ xmlNode ** answer)
+{
+ return -EINVAL;
+}
+
+int
+sync_our_cib(xmlNode * request, gboolean all)
+{
+ int result = pcmk_ok;
+ char *digest = NULL;
+ const char *host = crm_element_value(request, F_ORIG);
+ const char *op = crm_element_value(request, F_CIB_OPERATION);
+
+ xmlNode *replace_request = NULL;
+
+ CRM_CHECK(the_cib != NULL, return -EINVAL);
+
+ replace_request = cib_msg_copy(request, FALSE);
+ CRM_CHECK(replace_request != NULL, return -EINVAL);
+
+ crm_debug("Syncing CIB to %s", all ? "all peers" : host);
+ if (all == FALSE && host == NULL) {
+ crm_log_xml_err(request, "bad sync");
+ }
+
+ /* remove the "all == FALSE" condition
+ *
+ * sync_from was failing, the local client wasn't being notified
+ * because it didn't know it was a reply
+ * setting this does not prevent the other nodes from applying it
+ * if all == TRUE
+ */
+ if (host != NULL) {
+ crm_xml_add(replace_request, F_CIB_ISREPLY, host);
+ }
+ if (all) {
+ xml_remove_prop(replace_request, F_CIB_HOST);
+ }
+
+ crm_xml_add(replace_request, F_CIB_OPERATION, PCMK__CIB_REQUEST_REPLACE);
+ crm_xml_add(replace_request, "original_" F_CIB_OPERATION, op);
+ pcmk__xe_set_bool_attr(replace_request, F_CIB_GLOBAL_UPDATE, true);
+
+ crm_xml_add(replace_request, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
+ digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, CRM_FEATURE_SET);
+ crm_xml_add(replace_request, XML_ATTR_DIGEST, digest);
+
+ add_message_xml(replace_request, F_CIB_CALLDATA, the_cib);
+
+ if (send_cluster_message
+ (all ? NULL : crm_get_peer(0, host), crm_msg_cib, replace_request, FALSE) == FALSE) {
+ result = -ENOTCONN;
+ }
+ free_xml(replace_request);
+ free(digest);
+ return result;
+}
diff --git a/daemons/based/based_notify.c b/daemons/based/based_notify.c
new file mode 100644
index 0000000..5881f6d
--- /dev/null
+++ b/daemons/based/based_notify.c
@@ -0,0 +1,305 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <inttypes.h> // PRIx64
+
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <time.h>
+
+#include <crm/crm.h>
+#include <crm/cib/internal.h>
+#include <crm/msg_xml.h>
+
+#include <crm/common/xml.h>
+#include <crm/common/remote_internal.h>
+#include <pacemaker-based.h>
+
+struct cib_notification_s {
+ xmlNode *msg;
+ struct iovec *iov;
+ int32_t iov_size;
+};
+
+static void
+cib_notify_send_one(gpointer key, gpointer value, gpointer user_data)
+{
+ const char *type = NULL;
+ gboolean do_send = FALSE;
+ int rc = pcmk_rc_ok;
+
+ pcmk__client_t *client = value;
+ struct cib_notification_s *update = user_data;
+
+ if (client->ipcs == NULL && client->remote == NULL) {
+ crm_warn("Skipping client with NULL channel");
+ return;
+ }
+
+ type = crm_element_value(update->msg, F_SUBTYPE);
+ CRM_LOG_ASSERT(type != NULL);
+
+ if (pcmk_is_set(client->flags, cib_notify_diff)
+ && pcmk__str_eq(type, T_CIB_DIFF_NOTIFY, pcmk__str_casei)) {
+
+ do_send = TRUE;
+
+ } else if (pcmk_is_set(client->flags, cib_notify_replace)
+ && pcmk__str_eq(type, T_CIB_REPLACE_NOTIFY, pcmk__str_casei)) {
+ do_send = TRUE;
+
+ } else if (pcmk_is_set(client->flags, cib_notify_confirm)
+ && pcmk__str_eq(type, T_CIB_UPDATE_CONFIRM, pcmk__str_casei)) {
+ do_send = TRUE;
+
+ } else if (pcmk_is_set(client->flags, cib_notify_pre)
+ && pcmk__str_eq(type, T_CIB_PRE_NOTIFY, pcmk__str_casei)) {
+ do_send = TRUE;
+
+ } else if (pcmk_is_set(client->flags, cib_notify_post)
+ && pcmk__str_eq(type, T_CIB_POST_NOTIFY, pcmk__str_casei)) {
+
+ do_send = TRUE;
+ }
+
+ if (do_send) {
+ switch (PCMK__CLIENT_TYPE(client)) {
+ case pcmk__client_ipc:
+ rc = pcmk__ipc_send_iov(client, update->iov,
+ crm_ipc_server_event);
+ if (rc != pcmk_rc_ok) {
+ crm_warn("Could not notify client %s: %s " CRM_XS " id=%s",
+ pcmk__client_name(client), pcmk_rc_str(rc),
+ client->id);
+ }
+ break;
+#ifdef HAVE_GNUTLS_GNUTLS_H
+ case pcmk__client_tls:
+#endif
+ case pcmk__client_tcp:
+ crm_debug("Sent %s notification to client %s (id %s)",
+ type, pcmk__client_name(client), client->id);
+ pcmk__remote_send_xml(client->remote, update->msg);
+ break;
+ default:
+ crm_err("Unknown transport for client %s "
+ CRM_XS " flags=%#016" PRIx64,
+ pcmk__client_name(client), client->flags);
+ }
+ }
+}
+
+static void
+cib_notify_send(xmlNode * xml)
+{
+ struct iovec *iov;
+ struct cib_notification_s update;
+
+ ssize_t bytes = 0;
+ int rc = pcmk__ipc_prepare_iov(0, xml, 0, &iov, &bytes);
+
+ if (rc == pcmk_rc_ok) {
+ update.msg = xml;
+ update.iov = iov;
+ update.iov_size = bytes;
+ pcmk__foreach_ipc_client(cib_notify_send_one, &update);
+
+ } else {
+ crm_notice("Could not notify clients: %s " CRM_XS " rc=%d",
+ pcmk_rc_str(rc), rc);
+ }
+ pcmk_free_ipc_event(iov);
+}
+
+static void
+attach_cib_generation(xmlNode *msg, const char *field, xmlNode *a_cib)
+{
+ xmlNode *generation = create_xml_node(NULL, XML_CIB_TAG_GENERATION_TUPPLE);
+
+ if (a_cib != NULL) {
+ copy_in_properties(generation, a_cib);
+ }
+ add_message_xml(msg, field, generation);
+ free_xml(generation);
+}
+
+void
+cib_diff_notify(const char *op, int result, const char *call_id,
+ const char *client_id, const char *client_name,
+ const char *origin, xmlNode *update, xmlNode *diff)
+{
+ int add_updates = 0;
+ int add_epoch = 0;
+ int add_admin_epoch = 0;
+
+ int del_updates = 0;
+ int del_epoch = 0;
+ int del_admin_epoch = 0;
+
+ uint8_t log_level = LOG_TRACE;
+
+ xmlNode *update_msg = NULL;
+ const char *type = NULL;
+
+ if (diff == NULL) {
+ return;
+ }
+
+ if (result != pcmk_ok) {
+ log_level = LOG_WARNING;
+ }
+
+ cib_diff_version_details(diff, &add_admin_epoch, &add_epoch, &add_updates,
+ &del_admin_epoch, &del_epoch, &del_updates);
+
+ if ((add_admin_epoch != del_admin_epoch)
+ || (add_epoch != del_epoch)
+ || (add_updates != del_updates)) {
+
+ do_crm_log(log_level,
+ "Updated CIB generation %d.%d.%d to %d.%d.%d from client "
+ "%s%s%s (%s) (%s)",
+ del_admin_epoch, del_epoch, del_updates,
+ add_admin_epoch, add_epoch, add_updates,
+ client_name,
+ ((call_id != NULL)? " call " : ""), pcmk__s(call_id, ""),
+ pcmk__s(origin, "unspecified peer"), pcmk_strerror(result));
+
+ } else if ((add_admin_epoch != 0)
+ || (add_epoch != 0)
+ || (add_updates != 0)) {
+
+ do_crm_log(log_level,
+ "Local-only change to CIB generation %d.%d.%d from client "
+ "%s%s%s (%s) (%s)",
+ add_admin_epoch, add_epoch, add_updates,
+ client_name,
+ ((call_id != NULL)? " call " : ""), pcmk__s(call_id, ""),
+ pcmk__s(origin, "unspecified peer"), pcmk_strerror(result));
+ }
+
+ update_msg = create_xml_node(NULL, "notify");
+
+ crm_xml_add(update_msg, F_TYPE, T_CIB_NOTIFY);
+ crm_xml_add(update_msg, F_SUBTYPE, T_CIB_DIFF_NOTIFY);
+ crm_xml_add(update_msg, F_CIB_OPERATION, op);
+ crm_xml_add(update_msg, F_CIB_CLIENTID, client_id);
+ crm_xml_add(update_msg, F_CIB_CALLID, call_id);
+ crm_xml_add(update_msg, F_ORIG, origin);
+ crm_xml_add_int(update_msg, F_CIB_RC, result);
+
+ if (update != NULL) {
+ type = crm_element_name(update);
+ crm_trace("Setting type to update->name: %s", type);
+ } else {
+ type = crm_element_name(diff);
+ crm_trace("Setting type to new_obj->name: %s", type);
+ }
+ crm_xml_add(update_msg, F_CIB_OBJID, ID(diff));
+ crm_xml_add(update_msg, F_CIB_OBJTYPE, type);
+ attach_cib_generation(update_msg, "cib_generation", the_cib);
+
+ if (update != NULL) {
+ add_message_xml(update_msg, F_CIB_UPDATE, update);
+ }
+ add_message_xml(update_msg, F_CIB_UPDATE_RESULT, diff);
+
+ cib_notify_send(update_msg);
+ free_xml(update_msg);
+}
+
+void
+cib_replace_notify(const char *op, int result, const char *call_id,
+ const char *client_id, const char *client_name,
+ const char *origin, xmlNode *update, xmlNode *diff,
+ uint32_t change_section)
+{
+ xmlNode *replace_msg = NULL;
+
+ int add_updates = 0;
+ int add_epoch = 0;
+ int add_admin_epoch = 0;
+
+ int del_updates = 0;
+ int del_epoch = 0;
+ int del_admin_epoch = 0;
+
+ uint8_t log_level = LOG_INFO;
+
+ if (diff == NULL) {
+ return;
+ }
+
+ if (result != pcmk_ok) {
+ log_level = LOG_WARNING;
+ }
+
+ cib_diff_version_details(diff, &add_admin_epoch, &add_epoch, &add_updates,
+ &del_admin_epoch, &del_epoch, &del_updates);
+
+ if (del_updates < 0) {
+ crm_log_xml_debug(diff, "Bad replace diff");
+ }
+
+ if ((add_admin_epoch != del_admin_epoch)
+ || (add_epoch != del_epoch)
+ || (add_updates != del_updates)) {
+
+ do_crm_log(log_level,
+ "Replaced CIB generation %d.%d.%d with %d.%d.%d from client "
+ "%s%s%s (%s) (%s)",
+ del_admin_epoch, del_epoch, del_updates,
+ add_admin_epoch, add_epoch, add_updates,
+ client_name,
+ ((call_id != NULL)? " call " : ""), pcmk__s(call_id, ""),
+ pcmk__s(origin, "unspecified peer"), pcmk_strerror(result));
+
+ } else if ((add_admin_epoch != 0)
+ || (add_epoch != 0)
+ || (add_updates != 0)) {
+
+ do_crm_log(log_level,
+ "Local-only replace of CIB generation %d.%d.%d from client "
+ "%s%s%s (%s) (%s)",
+ add_admin_epoch, add_epoch, add_updates,
+ client_name,
+ ((call_id != NULL)? " call " : ""), pcmk__s(call_id, ""),
+ pcmk__s(origin, "unspecified peer"), pcmk_strerror(result));
+ }
+
+ replace_msg = create_xml_node(NULL, "notify-replace");
+
+ crm_xml_add(replace_msg, F_TYPE, T_CIB_NOTIFY);
+ crm_xml_add(replace_msg, F_SUBTYPE, T_CIB_REPLACE_NOTIFY);
+ crm_xml_add(replace_msg, F_CIB_OPERATION, op);
+ crm_xml_add(replace_msg, F_CIB_CLIENTID, client_id);
+ crm_xml_add(replace_msg, F_CIB_CALLID, call_id);
+ crm_xml_add(replace_msg, F_ORIG, origin);
+ crm_xml_add_int(replace_msg, F_CIB_RC, result);
+ crm_xml_add_ll(replace_msg, F_CIB_CHANGE_SECTION,
+ (long long) change_section);
+ attach_cib_generation(replace_msg, "cib-replace-generation", update);
+
+ /* We can include update and diff if a replace callback needs them. Until
+ * then, avoid the overhead.
+ */
+
+ crm_log_xml_trace(replace_msg, "CIB replaced");
+
+ cib_notify_send(replace_msg);
+ free_xml(replace_msg);
+}
diff --git a/daemons/based/based_remote.c b/daemons/based/based_remote.c
new file mode 100644
index 0000000..38136d2
--- /dev/null
+++ b/daemons/based/based_remote.c
@@ -0,0 +1,680 @@
+/*
+ * Copyright 2004-2021 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include <crm/crm.h>
+
+#include <sys/param.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <inttypes.h> // PRIx64
+#include <sys/socket.h>
+#include <arpa/inet.h>
+
+#include <netinet/ip.h>
+
+#include <stdlib.h>
+#include <errno.h>
+#include <glib.h>
+
+#include <crm/msg_xml.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/common/xml.h>
+#include <crm/common/remote_internal.h>
+#include <crm/cib/internal.h>
+
+#include "pacemaker-based.h"
+
+/* #undef HAVE_PAM_PAM_APPL_H */
+/* #undef HAVE_GNUTLS_GNUTLS_H */
+
+#ifdef HAVE_GNUTLS_GNUTLS_H
+# include <gnutls/gnutls.h>
+#endif
+
+#include <pwd.h>
+#include <grp.h>
+#if HAVE_SECURITY_PAM_APPL_H
+# include <security/pam_appl.h>
+# define HAVE_PAM 1
+#else
+# if HAVE_PAM_PAM_APPL_H
+# include <pam/pam_appl.h>
+# define HAVE_PAM 1
+# endif
+#endif
+
+extern int remote_tls_fd;
+extern gboolean cib_shutdown_flag;
+
+int init_remote_listener(int port, gboolean encrypted);
+void cib_remote_connection_destroy(gpointer user_data);
+
+#ifdef HAVE_GNUTLS_GNUTLS_H
+gnutls_dh_params_t dh_params;
+gnutls_anon_server_credentials_t anon_cred_s;
+static void
+debug_log(int level, const char *str)
+{
+ fputs(str, stderr);
+}
+#endif
+
+#define REMOTE_AUTH_TIMEOUT 10000
+
+int num_clients;
+int authenticate_user(const char *user, const char *passwd);
+static int cib_remote_listen(gpointer data);
+static int cib_remote_msg(gpointer data);
+
+static void
+remote_connection_destroy(gpointer user_data)
+{
+ crm_info("No longer listening for remote connections");
+ return;
+}
+
+int
+init_remote_listener(int port, gboolean encrypted)
+{
+ int rc;
+ int *ssock = NULL;
+ struct sockaddr_in saddr;
+ int optval;
+
+ static struct mainloop_fd_callbacks remote_listen_fd_callbacks = {
+ .dispatch = cib_remote_listen,
+ .destroy = remote_connection_destroy,
+ };
+
+ if (port <= 0) {
+ /* don't start it */
+ return 0;
+ }
+
+ if (encrypted) {
+#ifndef HAVE_GNUTLS_GNUTLS_H
+ crm_warn("TLS support is not available");
+ return 0;
+#else
+ crm_notice("Starting TLS listener on port %d", port);
+ crm_gnutls_global_init();
+ /* gnutls_global_set_log_level (10); */
+ gnutls_global_set_log_function(debug_log);
+ if (pcmk__init_tls_dh(&dh_params) != pcmk_rc_ok) {
+ return -1;
+ }
+ gnutls_anon_allocate_server_credentials(&anon_cred_s);
+ gnutls_anon_set_server_dh_params(anon_cred_s, dh_params);
+#endif
+ } else {
+ crm_warn("Starting plain-text listener on port %d", port);
+ }
+#ifndef HAVE_PAM
+ crm_warn("PAM is _not_ enabled!");
+#endif
+
+ /* create server socket */
+ ssock = malloc(sizeof(int));
+ if(ssock == NULL) {
+ crm_perror(LOG_ERR, "Listener socket allocation failed");
+ return -1;
+ }
+
+ *ssock = socket(AF_INET, SOCK_STREAM, 0);
+ if (*ssock == -1) {
+ crm_perror(LOG_ERR, "Listener socket creation failed");
+ free(ssock);
+ return -1;
+ }
+
+ /* reuse address */
+ optval = 1;
+ rc = setsockopt(*ssock, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
+ if (rc < 0) {
+ crm_perror(LOG_WARNING,
+ "Local address reuse not allowed on listener socket");
+ }
+
+ /* bind server socket */
+ memset(&saddr, '\0', sizeof(saddr));
+ saddr.sin_family = AF_INET;
+ saddr.sin_addr.s_addr = INADDR_ANY;
+ saddr.sin_port = htons(port);
+ if (bind(*ssock, (struct sockaddr *)&saddr, sizeof(saddr)) == -1) {
+ crm_perror(LOG_ERR, "Cannot bind to listener socket");
+ close(*ssock);
+ free(ssock);
+ return -2;
+ }
+ if (listen(*ssock, 10) == -1) {
+ crm_perror(LOG_ERR, "Cannot listen on socket");
+ close(*ssock);
+ free(ssock);
+ return -3;
+ }
+
+ mainloop_add_fd("cib-remote", G_PRIORITY_DEFAULT, *ssock, ssock, &remote_listen_fd_callbacks);
+ crm_debug("Started listener on port %d", port);
+
+ return *ssock;
+}
+
+static int
+check_group_membership(const char *usr, const char *grp)
+{
+ int index = 0;
+ struct passwd *pwd = NULL;
+ struct group *group = NULL;
+
+ CRM_CHECK(usr != NULL, return FALSE);
+ CRM_CHECK(grp != NULL, return FALSE);
+
+ pwd = getpwnam(usr);
+ if (pwd == NULL) {
+ crm_err("No user named '%s' exists!", usr);
+ return FALSE;
+ }
+
+ group = getgrgid(pwd->pw_gid);
+ if (group != NULL && pcmk__str_eq(grp, group->gr_name, pcmk__str_none)) {
+ return TRUE;
+ }
+
+ group = getgrnam(grp);
+ if (group == NULL) {
+ crm_err("No group named '%s' exists!", grp);
+ return FALSE;
+ }
+
+ while (TRUE) {
+ char *member = group->gr_mem[index++];
+
+ if (member == NULL) {
+ break;
+
+ } else if (pcmk__str_eq(usr, member, pcmk__str_none)) {
+ return TRUE;
+ }
+ };
+
+ return FALSE;
+}
+
+static gboolean
+cib_remote_auth(xmlNode * login)
+{
+ const char *user = NULL;
+ const char *pass = NULL;
+ const char *tmp = NULL;
+
+ crm_log_xml_info(login, "Login: ");
+ if (login == NULL) {
+ return FALSE;
+ }
+
+ tmp = crm_element_name(login);
+ if (!pcmk__str_eq(tmp, "cib_command", pcmk__str_casei)) {
+ crm_err("Wrong tag: %s", tmp);
+ return FALSE;
+ }
+
+ tmp = crm_element_value(login, "op");
+ if (!pcmk__str_eq(tmp, "authenticate", pcmk__str_casei)) {
+ crm_err("Wrong operation: %s", tmp);
+ return FALSE;
+ }
+
+ user = crm_element_value(login, "user");
+ pass = crm_element_value(login, "password");
+
+ if (!user || !pass) {
+ crm_err("missing auth credentials");
+ return FALSE;
+ }
+
+ /* Non-root daemons can only validate the password of the
+ * user they're running as
+ */
+ if (check_group_membership(user, CRM_DAEMON_GROUP) == FALSE) {
+ crm_err("User is not a member of the required group");
+ return FALSE;
+
+ } else if (authenticate_user(user, pass) == FALSE) {
+ crm_err("PAM auth failed");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static gboolean
+remote_auth_timeout_cb(gpointer data)
+{
+ pcmk__client_t *client = data;
+
+ client->remote->auth_timeout = 0;
+
+ if (pcmk_is_set(client->flags, pcmk__client_authenticated)) {
+ return FALSE;
+ }
+
+ mainloop_del_fd(client->remote->source);
+ crm_err("Remote client authentication timed out");
+
+ return FALSE;
+}
+
+static int
+cib_remote_listen(gpointer data)
+{
+ int csock = 0;
+ unsigned laddr;
+ struct sockaddr_storage addr;
+ char ipstr[INET6_ADDRSTRLEN];
+ int ssock = *(int *)data;
+ int rc;
+
+ pcmk__client_t *new_client = NULL;
+
+ static struct mainloop_fd_callbacks remote_client_fd_callbacks = {
+ .dispatch = cib_remote_msg,
+ .destroy = cib_remote_connection_destroy,
+ };
+
+ /* accept the connection */
+ laddr = sizeof(addr);
+ memset(&addr, 0, sizeof(addr));
+ csock = accept(ssock, (struct sockaddr *)&addr, &laddr);
+ if (csock == -1) {
+ crm_perror(LOG_ERR, "Could not accept socket connection");
+ return TRUE;
+ }
+
+ pcmk__sockaddr2str(&addr, ipstr);
+ crm_debug("New %s connection from %s",
+ ((ssock == remote_tls_fd)? "secure" : "clear-text"), ipstr);
+
+ rc = pcmk__set_nonblocking(csock);
+ if (rc != pcmk_rc_ok) {
+ crm_err("Could not set socket non-blocking: %s " CRM_XS " rc=%d",
+ pcmk_rc_str(rc), rc);
+ close(csock);
+ return TRUE;
+ }
+
+ num_clients++;
+
+ new_client = pcmk__new_unauth_client(NULL);
+ new_client->remote = calloc(1, sizeof(pcmk__remote_t));
+
+ if (ssock == remote_tls_fd) {
+#ifdef HAVE_GNUTLS_GNUTLS_H
+ pcmk__set_client_flags(new_client, pcmk__client_tls);
+
+ /* create gnutls session for the server socket */
+ new_client->remote->tls_session = pcmk__new_tls_session(csock,
+ GNUTLS_SERVER,
+ GNUTLS_CRD_ANON,
+ anon_cred_s);
+ if (new_client->remote->tls_session == NULL) {
+ close(csock);
+ return TRUE;
+ }
+#endif
+ } else {
+ pcmk__set_client_flags(new_client, pcmk__client_tcp);
+ new_client->remote->tcp_socket = csock;
+ }
+
+ // Require the client to authenticate within this time
+ new_client->remote->auth_timeout = g_timeout_add(REMOTE_AUTH_TIMEOUT,
+ remote_auth_timeout_cb,
+ new_client);
+ crm_info("Remote CIB client pending authentication "
+ CRM_XS " %p id: %s", new_client, new_client->id);
+
+ new_client->remote->source =
+ mainloop_add_fd("cib-remote-client", G_PRIORITY_DEFAULT, csock, new_client,
+ &remote_client_fd_callbacks);
+
+ return TRUE;
+}
+
+void
+cib_remote_connection_destroy(gpointer user_data)
+{
+ pcmk__client_t *client = user_data;
+ int csock = 0;
+
+ if (client == NULL) {
+ return;
+ }
+
+ crm_trace("Cleaning up after client %s disconnect",
+ pcmk__client_name(client));
+
+ num_clients--;
+ crm_trace("Num unfree'd clients: %d", num_clients);
+
+ switch (PCMK__CLIENT_TYPE(client)) {
+ case pcmk__client_tcp:
+ csock = client->remote->tcp_socket;
+ break;
+#ifdef HAVE_GNUTLS_GNUTLS_H
+ case pcmk__client_tls:
+ if (client->remote->tls_session) {
+ void *sock_ptr = gnutls_transport_get_ptr(*client->remote->tls_session);
+
+ csock = GPOINTER_TO_INT(sock_ptr);
+ if (pcmk_is_set(client->flags,
+ pcmk__client_tls_handshake_complete)) {
+ gnutls_bye(*client->remote->tls_session, GNUTLS_SHUT_WR);
+ }
+ gnutls_deinit(*client->remote->tls_session);
+ gnutls_free(client->remote->tls_session);
+ client->remote->tls_session = NULL;
+ }
+ break;
+#endif
+ default:
+ crm_warn("Unknown transport for client %s "
+ CRM_XS " flags=%#016" PRIx64,
+ pcmk__client_name(client), client->flags);
+ }
+
+ if (csock > 0) {
+ close(csock);
+ }
+
+ pcmk__free_client(client);
+
+ crm_trace("Freed the cib client");
+
+ if (cib_shutdown_flag) {
+ cib_shutdown(0);
+ }
+ return;
+}
+
+static void
+cib_handle_remote_msg(pcmk__client_t *client, xmlNode *command)
+{
+ const char *value = NULL;
+
+ value = crm_element_name(command);
+ if (!pcmk__str_eq(value, "cib_command", pcmk__str_casei)) {
+ crm_log_xml_trace(command, "Bad command: ");
+ return;
+ }
+
+ if (client->name == NULL) {
+ value = crm_element_value(command, F_CLIENTNAME);
+ if (value == NULL) {
+ client->name = strdup(client->id);
+ } else {
+ client->name = strdup(value);
+ }
+ }
+
+ /* unset dangerous options */
+ xml_remove_prop(command, F_ORIG);
+ xml_remove_prop(command, F_CIB_HOST);
+ xml_remove_prop(command, F_CIB_GLOBAL_UPDATE);
+
+ crm_xml_add(command, F_TYPE, T_CIB);
+ crm_xml_add(command, F_CIB_CLIENTID, client->id);
+ crm_xml_add(command, F_CIB_CLIENTNAME, client->name);
+ crm_xml_add(command, F_CIB_USER, client->user);
+
+ if (crm_element_value(command, F_CIB_CALLID) == NULL) {
+ char *call_uuid = crm_generate_uuid();
+
+ /* fix the command */
+ crm_xml_add(command, F_CIB_CALLID, call_uuid);
+ free(call_uuid);
+ }
+
+ if (crm_element_value(command, F_CIB_CALLOPTS) == NULL) {
+ crm_xml_add_int(command, F_CIB_CALLOPTS, 0);
+ }
+
+ crm_log_xml_trace(command, "Remote command: ");
+ cib_common_callback_worker(0, 0, command, client, TRUE);
+}
+
+static int
+cib_remote_msg(gpointer data)
+{
+ xmlNode *command = NULL;
+ pcmk__client_t *client = data;
+ int rc;
+ int timeout = 1000;
+
+ if (pcmk_is_set(client->flags, pcmk__client_authenticated)) {
+ timeout = -1;
+ }
+
+ crm_trace("Remote %s message received for client %s",
+ pcmk__client_type_str(PCMK__CLIENT_TYPE(client)),
+ pcmk__client_name(client));
+
+#ifdef HAVE_GNUTLS_GNUTLS_H
+ if ((PCMK__CLIENT_TYPE(client) == pcmk__client_tls)
+ && !pcmk_is_set(client->flags, pcmk__client_tls_handshake_complete)) {
+
+ int rc = pcmk__read_handshake_data(client);
+
+ if (rc == EAGAIN) {
+ /* No more data is available at the moment. Just return for now;
+ * we'll get invoked again once the client sends more.
+ */
+ return 0;
+ } else if (rc != pcmk_rc_ok) {
+ return -1;
+ }
+
+ crm_debug("TLS handshake with remote CIB client completed");
+ pcmk__set_client_flags(client, pcmk__client_tls_handshake_complete);
+ if (client->remote->auth_timeout) {
+ g_source_remove(client->remote->auth_timeout);
+ }
+
+ // Require the client to authenticate within this time
+ client->remote->auth_timeout = g_timeout_add(REMOTE_AUTH_TIMEOUT,
+ remote_auth_timeout_cb,
+ client);
+ return 0;
+ }
+#endif
+
+ rc = pcmk__read_remote_message(client->remote, timeout);
+
+ /* must pass auth before we will process anything else */
+ if (!pcmk_is_set(client->flags, pcmk__client_authenticated)) {
+ xmlNode *reg;
+ const char *user = NULL;
+
+ command = pcmk__remote_message_xml(client->remote);
+ if (cib_remote_auth(command) == FALSE) {
+ free_xml(command);
+ return -1;
+ }
+
+ crm_notice("Remote CIB client connection accepted");
+ pcmk__set_client_flags(client, pcmk__client_authenticated);
+ g_source_remove(client->remote->auth_timeout);
+ client->remote->auth_timeout = 0;
+ client->name = crm_element_value_copy(command, "name");
+
+ user = crm_element_value(command, "user");
+ if (user) {
+ client->user = strdup(user);
+ }
+
+ /* send ACK */
+ reg = create_xml_node(NULL, "cib_result");
+ crm_xml_add(reg, F_CIB_OPERATION, CRM_OP_REGISTER);
+ crm_xml_add(reg, F_CIB_CLIENTID, client->id);
+ pcmk__remote_send_xml(client->remote, reg);
+ free_xml(reg);
+ free_xml(command);
+ }
+
+ command = pcmk__remote_message_xml(client->remote);
+ while (command) {
+ crm_trace("Remote client message received");
+ cib_handle_remote_msg(client, command);
+ free_xml(command);
+ command = pcmk__remote_message_xml(client->remote);
+ }
+
+ if (rc == ENOTCONN) {
+ crm_trace("Remote CIB client disconnected while reading from it");
+ return -1;
+ }
+
+ return 0;
+}
+
+#ifdef HAVE_PAM
+static int
+construct_pam_passwd(int num_msg, const struct pam_message **msg,
+ struct pam_response **response, void *data)
+{
+ int count = 0;
+ struct pam_response *reply;
+ char *string = (char *)data;
+
+ CRM_CHECK(data, return PAM_CONV_ERR);
+ CRM_CHECK(num_msg == 1, return PAM_CONV_ERR); /* We only want to handle one message */
+
+ reply = calloc(1, sizeof(struct pam_response));
+ CRM_ASSERT(reply != NULL);
+
+ for (count = 0; count < num_msg; ++count) {
+ switch (msg[count]->msg_style) {
+ case PAM_TEXT_INFO:
+ crm_info("PAM: %s", msg[count]->msg);
+ break;
+ case PAM_PROMPT_ECHO_OFF:
+ case PAM_PROMPT_ECHO_ON:
+ reply[count].resp_retcode = 0;
+ reply[count].resp = string; /* We already made a copy */
+ break;
+ case PAM_ERROR_MSG:
+ /* In theory we'd want to print this, but then
+ * we see the password prompt in the logs
+ */
+ /* crm_err("PAM error: %s", msg[count]->msg); */
+ break;
+ default:
+ crm_err("Unhandled conversation type: %d", msg[count]->msg_style);
+ goto bail;
+ }
+ }
+
+ *response = reply;
+ reply = NULL;
+
+ return PAM_SUCCESS;
+
+ bail:
+ for (count = 0; count < num_msg; ++count) {
+ if (reply[count].resp != NULL) {
+ switch (msg[count]->msg_style) {
+ case PAM_PROMPT_ECHO_ON:
+ case PAM_PROMPT_ECHO_OFF:
+ /* Erase the data - it contained a password */
+ while (*(reply[count].resp)) {
+ *(reply[count].resp)++ = '\0';
+ }
+ free(reply[count].resp);
+ break;
+ }
+ reply[count].resp = NULL;
+ }
+ }
+ free(reply);
+ reply = NULL;
+
+ return PAM_CONV_ERR;
+}
+#endif
+
+int
+authenticate_user(const char *user, const char *passwd)
+{
+#ifndef HAVE_PAM
+ gboolean pass = TRUE;
+#else
+ int rc = 0;
+ gboolean pass = FALSE;
+ const void *p_user = NULL;
+
+ struct pam_conv p_conv;
+ struct pam_handle *pam_h = NULL;
+ static const char *pam_name = NULL;
+
+ if (pam_name == NULL) {
+ pam_name = getenv("CIB_pam_service");
+ }
+ if (pam_name == NULL) {
+ pam_name = "login";
+ }
+
+ p_conv.conv = construct_pam_passwd;
+ p_conv.appdata_ptr = strdup(passwd);
+
+ rc = pam_start(pam_name, user, &p_conv, &pam_h);
+ if (rc != PAM_SUCCESS) {
+ crm_err("Could not initialize PAM: %s (%d)", pam_strerror(pam_h, rc), rc);
+ goto bail;
+ }
+
+ rc = pam_authenticate(pam_h, 0);
+ if (rc != PAM_SUCCESS) {
+ crm_err("Authentication failed for %s: %s (%d)", user, pam_strerror(pam_h, rc), rc);
+ goto bail;
+ }
+
+ /* Make sure we authenticated the user we wanted to authenticate.
+ * Since we also run as non-root, it might be worth pre-checking
+ * the user has the same EID as us, since that the only user we
+ * can authenticate.
+ */
+ rc = pam_get_item(pam_h, PAM_USER, &p_user);
+ if (rc != PAM_SUCCESS) {
+ crm_err("Internal PAM error: %s (%d)", pam_strerror(pam_h, rc), rc);
+ goto bail;
+
+ } else if (p_user == NULL) {
+ crm_err("Unknown user authenticated.");
+ goto bail;
+
+ } else if (!pcmk__str_eq(p_user, user, pcmk__str_casei)) {
+ crm_err("User mismatch: %s vs. %s.", (const char *)p_user, (const char *)user);
+ goto bail;
+ }
+
+ rc = pam_acct_mgmt(pam_h, 0);
+ if (rc != PAM_SUCCESS) {
+ crm_err("Access denied: %s (%d)", pam_strerror(pam_h, rc), rc);
+ goto bail;
+ }
+ pass = TRUE;
+
+ bail:
+ pam_end(pam_h, rc);
+#endif
+ return pass;
+}
diff --git a/daemons/based/cib.pam b/daemons/based/cib.pam
new file mode 100644
index 0000000..5d0f655
--- /dev/null
+++ b/daemons/based/cib.pam
@@ -0,0 +1,6 @@
+# login: auth account password session
+# may require permission to read /etc/shadow
+auth include common-auth
+account include common-account
+password include common-password
+session include common-session
diff --git a/daemons/based/pacemaker-based.c b/daemons/based/pacemaker-based.c
new file mode 100644
index 0000000..129997e
--- /dev/null
+++ b/daemons/based/pacemaker-based.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <pwd.h>
+#include <grp.h>
+#include <bzlib.h>
+#include <sys/types.h>
+
+#include <libxml/parser.h>
+
+#include <crm/crm.h>
+#include <crm/cib/internal.h>
+#include <crm/msg_xml.h>
+#include <crm/cluster/internal.h>
+#include <crm/common/cmdline_internal.h>
+#include <crm/common/mainloop.h>
+#include <crm/common/output_internal.h>
+#include <crm/common/xml.h>
+
+#include <pacemaker-based.h>
+
+#define SUMMARY "daemon for managing the configuration of a Pacemaker cluster"
+
+extern int init_remote_listener(int port, gboolean encrypted);
+gboolean cib_shutdown_flag = FALSE;
+int cib_status = pcmk_ok;
+
+crm_cluster_t *crm_cluster = NULL;
+
+GMainLoop *mainloop = NULL;
+gchar *cib_root = NULL;
+static gboolean preserve_status = FALSE;
+
+gboolean cib_writes_enabled = TRUE;
+
+int remote_fd = 0;
+int remote_tls_fd = 0;
+
+GHashTable *config_hash = NULL;
+GHashTable *local_notify_queue = NULL;
+
+pcmk__output_t *logger_out = NULL;
+
+static void cib_init(void);
+void cib_shutdown(int nsig);
+static bool startCib(const char *filename);
+extern int write_cib_contents(gpointer p);
+
+static crm_exit_t exit_code = CRM_EX_OK;
+
+static void
+cib_enable_writes(int nsig)
+{
+ crm_info("(Re)enabling disk writes");
+ cib_writes_enabled = TRUE;
+}
+
+/*!
+ * \internal
+ * \brief Set up options, users, and groups for stand-alone mode
+ *
+ * \param[out] error GLib error object
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+setup_stand_alone(GError **error)
+{
+ int rc = 0;
+ struct passwd *pwentry = NULL;
+
+ preserve_status = TRUE;
+ cib_writes_enabled = FALSE;
+
+ errno = 0;
+ pwentry = getpwnam(CRM_DAEMON_USER);
+ if (pwentry == NULL) {
+ exit_code = CRM_EX_FATAL;
+ if (errno != 0) {
+ g_set_error(error, PCMK__EXITC_ERROR, exit_code,
+ "Error getting password DB entry for %s: %s",
+ CRM_DAEMON_USER, strerror(errno));
+ return errno;
+ }
+ g_set_error(error, PCMK__EXITC_ERROR, exit_code,
+ "Password DB entry for '%s' not found", CRM_DAEMON_USER);
+ return ENXIO;
+ }
+
+ rc = setgid(pwentry->pw_gid);
+ if (rc < 0) {
+ exit_code = CRM_EX_FATAL;
+ g_set_error(error, PCMK__EXITC_ERROR, exit_code,
+ "Could not set group to %d: %s",
+ pwentry->pw_gid, strerror(errno));
+ return errno;
+ }
+
+ rc = initgroups(CRM_DAEMON_USER, pwentry->pw_gid);
+ if (rc < 0) {
+ exit_code = CRM_EX_FATAL;
+ g_set_error(error, PCMK__EXITC_ERROR, exit_code,
+ "Could not setup groups for user %d: %s",
+ pwentry->pw_uid, strerror(errno));
+ return errno;
+ }
+
+ rc = setuid(pwentry->pw_uid);
+ if (rc < 0) {
+ exit_code = CRM_EX_FATAL;
+ g_set_error(error, PCMK__EXITC_ERROR, exit_code,
+ "Could not set user to %d: %s",
+ pwentry->pw_uid, strerror(errno));
+ return errno;
+ }
+ return pcmk_rc_ok;
+}
+
+static GOptionEntry entries[] = {
+ { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone,
+ "(Advanced use only) Run in stand-alone mode", NULL },
+
+ { "disk-writes", 'w', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE,
+ &cib_writes_enabled,
+ "(Advanced use only) Enable disk writes (enabled by default unless in "
+ "stand-alone mode)", NULL },
+
+ { "cib-root", 'r', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME, &cib_root,
+ "(Advanced use only) Directory where the CIB XML file should be located "
+ "(default: " CRM_CONFIG_DIR ")", NULL },
+
+ { NULL }
+};
+
+static pcmk__supported_format_t formats[] = {
+ PCMK__SUPPORTED_FORMAT_NONE,
+ PCMK__SUPPORTED_FORMAT_TEXT,
+ PCMK__SUPPORTED_FORMAT_XML,
+ { NULL, NULL, NULL }
+};
+
+static GOptionContext *
+build_arg_context(pcmk__common_args_t *args, GOptionGroup **group)
+{
+ GOptionContext *context = NULL;
+
+ context = pcmk__build_arg_context(args, "text (default), xml", group,
+ "[metadata]");
+ pcmk__add_main_args(context, entries);
+ return context;
+}
+
+int
+main(int argc, char **argv)
+{
+ int rc = pcmk_rc_ok;
+ crm_ipc_t *old_instance = NULL;
+
+ pcmk__output_t *out = NULL;
+
+ GError *error = NULL;
+
+ GOptionGroup *output_group = NULL;
+ pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
+ gchar **processed_args = pcmk__cmdline_preproc(argv, "r");
+ GOptionContext *context = build_arg_context(args, &output_group);
+
+ crm_log_preinit(NULL, argc, argv);
+
+ pcmk__register_formats(output_group, formats);
+ if (!g_option_context_parse_strv(context, &processed_args, &error)) {
+ exit_code = CRM_EX_USAGE;
+ goto done;
+ }
+
+ rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
+ if (rc != pcmk_rc_ok) {
+ exit_code = CRM_EX_ERROR;
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Error creating output format %s: %s",
+ args->output_ty, pcmk_rc_str(rc));
+ goto done;
+ }
+
+ if (args->version) {
+ out->version(out, false);
+ goto done;
+ }
+
+ rc = pcmk__log_output_new(&logger_out);
+ if (rc != pcmk_rc_ok) {
+ exit_code = CRM_EX_ERROR;
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Error creating output format log: %s", pcmk_rc_str(rc));
+ goto done;
+ }
+ pcmk__output_set_log_level(logger_out, LOG_TRACE);
+
+ mainloop_add_signal(SIGTERM, cib_shutdown);
+ mainloop_add_signal(SIGPIPE, cib_enable_writes);
+
+ cib_writer = mainloop_add_trigger(G_PRIORITY_LOW, write_cib_contents, NULL);
+
+ if ((g_strv_length(processed_args) >= 2)
+ && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) {
+ cib_metadata();
+ goto done;
+ }
+
+ pcmk__cli_init_logging("pacemaker-based", args->verbosity);
+ crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
+ crm_notice("Starting Pacemaker CIB manager");
+
+ old_instance = crm_ipc_new(PCMK__SERVER_BASED_RO, 0);
+ if (old_instance == NULL) {
+ /* crm_ipc_new() will have already logged an error message with
+ * crm_err()
+ */
+ exit_code = CRM_EX_FATAL;
+ goto done;
+ }
+
+ if (crm_ipc_connect(old_instance)) {
+ /* IPC end-point already up */
+ crm_ipc_close(old_instance);
+ crm_ipc_destroy(old_instance);
+ crm_err("pacemaker-based is already active, aborting startup");
+ goto done;
+ } else {
+ /* not up or not authentic, we'll proceed either way */
+ crm_ipc_destroy(old_instance);
+ old_instance = NULL;
+ }
+
+ if (stand_alone) {
+ rc = setup_stand_alone(&error);
+ if (rc != pcmk_rc_ok) {
+ goto done;
+ }
+ }
+
+ if (cib_root == NULL) {
+ cib_root = g_strdup(CRM_CONFIG_DIR);
+ } else {
+ crm_notice("Using custom config location: %s", cib_root);
+ }
+
+ if (!pcmk__daemon_can_write(cib_root, NULL)) {
+ exit_code = CRM_EX_FATAL;
+ crm_err("Terminating due to bad permissions on %s", cib_root);
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Bad permissions on %s (see logs for details)", cib_root);
+ goto done;
+ }
+
+ crm_peer_init();
+
+ // Read initial CIB, connect to cluster, and start IPC servers
+ cib_init();
+
+ // Run the main loop
+ mainloop = g_main_loop_new(NULL, FALSE);
+ crm_notice("Pacemaker CIB manager successfully started and accepting connections");
+ g_main_loop_run(mainloop);
+
+ /* If main loop returned, clean up and exit. We disconnect in case
+ * terminate_cib() was called with fast=-1.
+ */
+ crm_cluster_disconnect(crm_cluster);
+ pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm);
+
+done:
+ g_strfreev(processed_args);
+ pcmk__free_arg_context(context);
+
+ crm_peer_destroy();
+
+ if (local_notify_queue != NULL) {
+ g_hash_table_destroy(local_notify_queue);
+ }
+
+ if (config_hash != NULL) {
+ g_hash_table_destroy(config_hash);
+ }
+ pcmk__client_cleanup();
+ pcmk_cluster_free(crm_cluster);
+ g_free(cib_root);
+
+ pcmk__output_and_clear_error(&error, out);
+
+ if (out != NULL) {
+ out->finish(out, exit_code, true, NULL);
+ pcmk__output_free(out);
+ }
+ pcmk__unregister_formats();
+ crm_exit(exit_code);
+}
+
+#if SUPPORT_COROSYNC
+static void
+cib_cs_dispatch(cpg_handle_t handle,
+ const struct cpg_name *groupName,
+ uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
+{
+ uint32_t kind = 0;
+ xmlNode *xml = NULL;
+ const char *from = NULL;
+ char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
+
+ if(data == NULL) {
+ return;
+ }
+ if (kind == crm_class_cluster) {
+ xml = string2xml(data);
+ if (xml == NULL) {
+ crm_err("Invalid XML: '%.120s'", data);
+ free(data);
+ return;
+ }
+ crm_xml_add(xml, F_ORIG, from);
+ /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */
+ cib_peer_callback(xml, NULL);
+ }
+
+ free_xml(xml);
+ free(data);
+}
+
+static void
+cib_cs_destroy(gpointer user_data)
+{
+ if (cib_shutdown_flag) {
+ crm_info("Corosync disconnection complete");
+ } else {
+ crm_crit("Lost connection to cluster layer, shutting down");
+ terminate_cib(__func__, CRM_EX_DISCONNECT);
+ }
+}
+#endif
+
+static void
+cib_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
+{
+ switch (type) {
+ case crm_status_processes:
+ if (cib_legacy_mode()
+ && !pcmk_is_set(node->processes, crm_get_cluster_proc())) {
+
+ uint32_t old = data? *(const uint32_t *)data : 0;
+
+ if ((node->processes ^ old) & crm_proc_cpg) {
+ crm_info("Attempting to disable legacy mode after %s left the cluster",
+ node->uname);
+ legacy_mode = FALSE;
+ }
+ }
+ break;
+
+ case crm_status_uname:
+ case crm_status_nstate:
+ if (cib_shutdown_flag && (crm_active_peers() < 2)
+ && (pcmk__ipc_client_count() == 0)) {
+
+ crm_info("No more peers");
+ terminate_cib(__func__, -1);
+ }
+ break;
+ }
+}
+
+static void
+cib_init(void)
+{
+ crm_cluster = pcmk_cluster_new();
+
+#if SUPPORT_COROSYNC
+ if (is_corosync_cluster()) {
+ crm_cluster->destroy = cib_cs_destroy;
+ crm_cluster->cpg.cpg_deliver_fn = cib_cs_dispatch;
+ crm_cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership;
+ }
+#endif // SUPPORT_COROSYNC
+
+ config_hash = pcmk__strkey_table(free, free);
+
+ if (startCib("cib.xml") == FALSE) {
+ crm_crit("Cannot start CIB... terminating");
+ crm_exit(CRM_EX_NOINPUT);
+ }
+
+ if (!stand_alone) {
+ crm_set_status_callback(&cib_peer_update_callback);
+
+ if (!crm_cluster_connect(crm_cluster)) {
+ crm_crit("Cannot sign in to the cluster... terminating");
+ crm_exit(CRM_EX_FATAL);
+ }
+ }
+
+ pcmk__serve_based_ipc(&ipcs_ro, &ipcs_rw, &ipcs_shm, &ipc_ro_callbacks,
+ &ipc_rw_callbacks);
+
+ if (stand_alone) {
+ based_is_primary = true;
+ }
+}
+
+static bool
+startCib(const char *filename)
+{
+ gboolean active = FALSE;
+ xmlNode *cib = readCibXmlFile(cib_root, filename, !preserve_status);
+
+ if (activateCibXml(cib, TRUE, "start") == 0) {
+ int port = 0;
+
+ active = TRUE;
+
+ cib_read_config(config_hash, cib);
+
+ pcmk__scan_port(crm_element_value(cib, "remote-tls-port"), &port);
+ if (port >= 0) {
+ remote_tls_fd = init_remote_listener(port, TRUE);
+ }
+
+ pcmk__scan_port(crm_element_value(cib, "remote-clear-port"), &port);
+ if (port >= 0) {
+ remote_fd = init_remote_listener(port, FALSE);
+ }
+ }
+ return active;
+}
diff --git a/daemons/based/pacemaker-based.h b/daemons/based/pacemaker-based.h
new file mode 100644
index 0000000..05e49b3
--- /dev/null
+++ b/daemons/based/pacemaker-based.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef PACEMAKER_BASED__H
+# define PACEMAKER_BASED__H
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <glib.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <crm/crm.h>
+#include <crm/cib.h>
+#include <crm/common/xml.h>
+#include <crm/cluster.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/common/mainloop.h>
+#include <crm/cib/internal.h>
+
+#ifdef HAVE_GNUTLS_GNUTLS_H
+# include <gnutls/gnutls.h>
+#endif
+
+// CIB-specific client flags
+enum cib_client_flags {
+ // Notifications
+ cib_notify_pre = (UINT64_C(1) << 0),
+ cib_notify_post = (UINT64_C(1) << 1),
+ cib_notify_replace = (UINT64_C(1) << 2),
+ cib_notify_confirm = (UINT64_C(1) << 3),
+ cib_notify_diff = (UINT64_C(1) << 4),
+
+ // Whether client is another cluster daemon
+ cib_is_daemon = (UINT64_C(1) << 12),
+};
+
+typedef struct cib_operation_s {
+ const char *operation;
+ gboolean modifies_cib;
+ gboolean needs_privileges;
+ int (*prepare) (xmlNode *, xmlNode **, const char **);
+ int (*cleanup) (int, xmlNode **, xmlNode **);
+ int (*fn) (const char *, int, const char *, xmlNode *,
+ xmlNode *, xmlNode *, xmlNode **, xmlNode **);
+} cib_operation_t;
+
+extern bool based_is_primary;
+extern GHashTable *config_hash;
+extern xmlNode *the_cib;
+extern crm_trigger_t *cib_writer;
+extern gboolean cib_writes_enabled;
+
+extern GMainLoop *mainloop;
+extern crm_cluster_t *crm_cluster;
+extern GHashTable *local_notify_queue;
+extern gboolean legacy_mode;
+extern gboolean stand_alone;
+extern gboolean cib_shutdown_flag;
+extern gchar *cib_root;
+extern int cib_status;
+extern pcmk__output_t *logger_out;
+
+extern struct qb_ipcs_service_handlers ipc_ro_callbacks;
+extern struct qb_ipcs_service_handlers ipc_rw_callbacks;
+extern qb_ipcs_service_t *ipcs_ro;
+extern qb_ipcs_service_t *ipcs_rw;
+extern qb_ipcs_service_t *ipcs_shm;
+
+void cib_peer_callback(xmlNode *msg, void *private_data);
+void cib_common_callback_worker(uint32_t id, uint32_t flags,
+ xmlNode *op_request, pcmk__client_t *cib_client,
+ gboolean privileged);
+void cib_shutdown(int nsig);
+void terminate_cib(const char *caller, int fast);
+gboolean cib_legacy_mode(void);
+
+gboolean uninitializeCib(void);
+xmlNode *readCibXmlFile(const char *dir, const char *file,
+ gboolean discard_status);
+int activateCibXml(xmlNode *doc, gboolean to_disk, const char *op);
+
+int cib_process_shutdown_req(const char *op, int options, const char *section,
+ xmlNode *req, xmlNode *input,
+ xmlNode *existing_cib, xmlNode **result_cib,
+ xmlNode **answer);
+int cib_process_default(const char *op, int options, const char *section,
+ xmlNode *req, xmlNode *input, xmlNode *existing_cib,
+ xmlNode **result_cib, xmlNode **answer);
+int cib_process_ping(const char *op, int options, const char *section,
+ xmlNode *req, xmlNode *input, xmlNode *existing_cib,
+ xmlNode **result_cib, xmlNode **answer);
+int cib_process_readwrite(const char *op, int options, const char *section,
+ xmlNode *req, xmlNode *input, xmlNode *existing_cib,
+ xmlNode **result_cib, xmlNode **answer);
+int cib_process_replace_svr(const char *op, int options, const char *section,
+ xmlNode *req, xmlNode *input, xmlNode *existing_cib,
+ xmlNode **result_cib, xmlNode **answer);
+int cib_server_process_diff(const char *op, int options, const char *section,
+ xmlNode *req, xmlNode *input, xmlNode *existing_cib,
+ xmlNode **result_cib, xmlNode **answer);
+int cib_process_sync(const char *op, int options, const char *section,
+ xmlNode *req, xmlNode *input, xmlNode *existing_cib,
+ xmlNode **result_cib, xmlNode **answer);
+int cib_process_sync_one(const char *op, int options, const char *section,
+ xmlNode *req, xmlNode *input, xmlNode *existing_cib,
+ xmlNode **result_cib, xmlNode **answer);
+int cib_process_delete_absolute(const char *op, int options,
+ const char *section, xmlNode *req,
+ xmlNode *input, xmlNode *existing_cib,
+ xmlNode **result_cib, xmlNode **answer);
+int cib_process_upgrade_server(const char *op, int options, const char *section,
+ xmlNode *req, xmlNode *input,
+ xmlNode *existing_cib, xmlNode **result_cib,
+ xmlNode **answer);
+void send_sync_request(const char *host);
+int sync_our_cib(xmlNode *request, gboolean all);
+
+xmlNode *cib_msg_copy(xmlNode *msg, gboolean with_data);
+int cib_get_operation_id(const char *op, int *operation);
+cib_op_t *cib_op_func(int call_type);
+gboolean cib_op_modifies(int call_type);
+int cib_op_prepare(int call_type, xmlNode *request, xmlNode **input,
+ const char **section);
+int cib_op_cleanup(int call_type, int options, xmlNode **input,
+ xmlNode **output);
+int cib_op_can_run(int call_type, int call_options, bool privileged);
+void cib_diff_notify(const char *op, int result, const char *call_id,
+ const char *client_id, const char *client_name,
+ const char *origin, xmlNode *update, xmlNode *diff);
+void cib_replace_notify(const char *op, int result, const char *call_id,
+ const char *client_id, const char *client_name,
+ const char *origin, xmlNode *update, xmlNode *diff,
+ uint32_t change_section);
+
+static inline const char *
+cib_config_lookup(const char *opt)
+{
+ return g_hash_table_lookup(config_hash, opt);
+}
+
+#endif // PACEMAKER_BASED__H
diff --git a/daemons/controld/Makefile.am b/daemons/controld/Makefile.am
new file mode 100644
index 0000000..08be1ff
--- /dev/null
+++ b/daemons/controld/Makefile.am
@@ -0,0 +1,87 @@
+#
+# Copyright 2018-2023 the Pacemaker project contributors
+#
+# The version control history for this file may have further details.
+#
+# This source code is licensed under the GNU General Public License version 2
+# or later (GPLv2+) WITHOUT ANY WARRANTY.
+#
+
+include $(top_srcdir)/mk/common.mk
+include $(top_srcdir)/mk/man.mk
+
+halibdir = $(CRM_DAEMON_DIR)
+
+halib_PROGRAMS = pacemaker-controld
+
+noinst_HEADERS = controld_alerts.h \
+ controld_callbacks.h \
+ controld_cib.h \
+ controld_fencing.h \
+ controld_fsa.h \
+ controld_globals.h \
+ controld_lrm.h \
+ controld_membership.h \
+ controld_messages.h \
+ controld_metadata.h \
+ controld_throttle.h \
+ controld_timers.h \
+ controld_transition.h \
+ controld_utils.h \
+ pacemaker-controld.h
+
+pacemaker_controld_CFLAGS = $(CFLAGS_HARDENED_EXE)
+pacemaker_controld_LDFLAGS = $(LDFLAGS_HARDENED_EXE)
+
+pacemaker_controld_LDADD = $(top_builddir)/lib/fencing/libstonithd.la \
+ $(top_builddir)/lib/pacemaker/libpacemaker.la \
+ $(top_builddir)/lib/pengine/libpe_rules.la \
+ $(top_builddir)/lib/cib/libcib.la \
+ $(top_builddir)/lib/cluster/libcrmcluster.la \
+ $(top_builddir)/lib/common/libcrmcommon.la \
+ $(top_builddir)/lib/services/libcrmservice.la \
+ $(top_builddir)/lib/lrmd/liblrmd.la \
+ $(CLUSTERLIBS)
+
+pacemaker_controld_SOURCES = pacemaker-controld.c \
+ controld_alerts.c \
+ controld_attrd.c \
+ controld_callbacks.c \
+ controld_cib.c \
+ controld_control.c \
+ controld_corosync.c \
+ controld_election.c \
+ controld_execd.c \
+ controld_execd_state.c \
+ controld_fencing.c \
+ controld_fsa.c \
+ controld_join_client.c \
+ controld_join_dc.c \
+ controld_matrix.c \
+ controld_membership.c \
+ controld_messages.c \
+ controld_metadata.c \
+ controld_remote_ra.c \
+ controld_schedulerd.c \
+ controld_te_actions.c \
+ controld_te_callbacks.c \
+ controld_te_events.c \
+ controld_te_utils.c \
+ controld_throttle.c \
+ controld_timers.c \
+ controld_transition.c \
+ controld_utils.c
+
+if BUILD_XML_HELP
+man7_MANS = pacemaker-controld.7
+endif
+
+CLEANFILES = $(man7_MANS)
+
+if BUILD_LEGACY_LINKS
+install-exec-hook:
+ cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f crmd && $(LN_S) pacemaker-controld crmd
+
+uninstall-hook:
+ cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f crmd
+endif
diff --git a/daemons/controld/controld_alerts.c b/daemons/controld/controld_alerts.c
new file mode 100644
index 0000000..27a5ce2
--- /dev/null
+++ b/daemons/controld/controld_alerts.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2012-2021 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <glib.h>
+#include <libxml/tree.h>
+
+#include <crm/fencing/internal.h>
+#include <crm/lrmd.h>
+#include <crm/lrmd_internal.h>
+#include <crm/pengine/rules_internal.h>
+#include <crm/pengine/status.h>
+#include <crm/stonith-ng.h>
+
+#include <pacemaker-controld.h>
+
+static GList *crmd_alert_list = NULL;
+
+void
+crmd_unpack_alerts(xmlNode *alerts)
+{
+ pe_free_alert_list(crmd_alert_list);
+ crmd_alert_list = pe_unpack_alerts(alerts);
+}
+
+void
+crmd_alert_node_event(crm_node_t *node)
+{
+ lrm_state_t *lrm_state;
+
+ if (crmd_alert_list == NULL) {
+ return;
+ }
+
+ lrm_state = lrm_state_find(controld_globals.our_nodename);
+ if (lrm_state == NULL) {
+ return;
+ }
+
+ lrmd_send_node_alert((lrmd_t *) lrm_state->conn, crmd_alert_list,
+ node->uname, node->id, node->state);
+}
+
+void
+crmd_alert_fencing_op(stonith_event_t * e)
+{
+ char *desc;
+ lrm_state_t *lrm_state;
+
+ if (crmd_alert_list == NULL) {
+ return;
+ }
+
+ lrm_state = lrm_state_find(controld_globals.our_nodename);
+ if (lrm_state == NULL) {
+ return;
+ }
+
+ desc = stonith__event_description(e);
+ lrmd_send_fencing_alert((lrmd_t *) lrm_state->conn, crmd_alert_list,
+ e->target, e->operation, desc, e->result);
+ free(desc);
+}
+
+void
+crmd_alert_resource_op(const char *node, lrmd_event_data_t * op)
+{
+ lrm_state_t *lrm_state;
+
+ if (crmd_alert_list == NULL) {
+ return;
+ }
+
+ lrm_state = lrm_state_find(controld_globals.our_nodename);
+ if (lrm_state == NULL) {
+ return;
+ }
+
+ lrmd_send_resource_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, node,
+ op);
+}
diff --git a/daemons/controld/controld_alerts.h b/daemons/controld/controld_alerts.h
new file mode 100644
index 0000000..ec5852a
--- /dev/null
+++ b/daemons/controld/controld_alerts.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2015-2021 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef CONTROLD_ALERTS__H
+# define CONTROLD_ALERTS__H
+
+# include <crm/crm.h>
+# include <crm/cluster.h>
+# include <crm/stonith-ng.h>
+
+void crmd_unpack_alerts(xmlNode *alerts);
+void crmd_alert_node_event(crm_node_t *node);
+void crmd_alert_fencing_op(stonith_event_t *e);
+void crmd_alert_resource_op(const char *node, lrmd_event_data_t *op);
+
+#endif
diff --git a/daemons/controld/controld_attrd.c b/daemons/controld/controld_attrd.c
new file mode 100644
index 0000000..923abb9
--- /dev/null
+++ b/daemons/controld/controld_attrd.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2006-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <crm/crm.h>
+#include <crm/common/attrd_internal.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_attrd_internal.h>
+#include <crm/msg_xml.h>
+
+#include <pacemaker-controld.h>
+
+static pcmk_ipc_api_t *attrd_api = NULL;
+
+void
+controld_close_attrd_ipc(void)
+{
+ if (attrd_api != NULL) {
+ crm_trace("Closing connection to pacemaker-attrd");
+ pcmk_disconnect_ipc(attrd_api);
+ pcmk_free_ipc_api(attrd_api);
+ attrd_api = NULL;
+ }
+}
+
+static inline const char *
+node_type(bool is_remote)
+{
+ return is_remote? "Pacemaker Remote" : "cluster";
+}
+
+static inline const char *
+when(void)
+{
+ return pcmk_is_set(controld_globals.fsa_input_register,
+ R_SHUTDOWN)? " at shutdown" : "";
+}
+
+static void
+handle_attr_error(void)
+{
+ if (AM_I_DC) {
+ /* We are unable to provide accurate information to the
+ * scheduler, so allow another node to take over DC.
+ * @TODO Should we do this unconditionally on any failure?
+ */
+ crmd_exit(CRM_EX_FATAL);
+
+ } else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ // Fast-track shutdown since unable to request via attribute
+ register_fsa_input(C_FSA_INTERNAL, I_FAIL, NULL);
+ }
+}
+
+void
+update_attrd(const char *host, const char *name, const char *value,
+ const char *user_name, gboolean is_remote_node)
+{
+ int rc = pcmk_rc_ok;
+
+ if (attrd_api == NULL) {
+ rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd);
+ }
+ if (rc == pcmk_rc_ok) {
+ uint32_t attrd_opts = pcmk__node_attr_value;
+
+ if (is_remote_node) {
+ pcmk__set_node_attr_flags(attrd_opts, pcmk__node_attr_remote);
+ }
+ rc = pcmk__attrd_api_update(attrd_api, host, name, value,
+ NULL, NULL, user_name, attrd_opts);
+ }
+ if (rc != pcmk_rc_ok) {
+ do_crm_log(AM_I_DC? LOG_CRIT : LOG_ERR,
+ "Could not update attribute %s=%s for %s node %s%s: %s "
+ CRM_XS " rc=%d", name, value, node_type(is_remote_node),
+ host, when(), pcmk_rc_str(rc), rc);
+ handle_attr_error();
+ }
+}
+
+void
+update_attrd_list(GList *attrs, uint32_t opts)
+{
+ int rc = pcmk_rc_ok;
+
+ if (attrd_api == NULL) {
+ rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd);
+ }
+ if (rc == pcmk_rc_ok) {
+ rc = pcmk__attrd_api_update_list(attrd_api, attrs, NULL, NULL, NULL,
+ opts | pcmk__node_attr_value);
+ }
+ if (rc != pcmk_rc_ok) {
+ do_crm_log(AM_I_DC? LOG_CRIT : LOG_ERR,
+ "Could not update multiple node attributes: %s "
+ CRM_XS " rc=%d", pcmk_rc_str(rc), rc);
+ handle_attr_error();
+ }
+}
+
+void
+update_attrd_remote_node_removed(const char *host, const char *user_name)
+{
+ int rc = pcmk_rc_ok;
+
+ if (attrd_api == NULL) {
+ rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd);
+ }
+ if (rc == pcmk_rc_ok) {
+ crm_trace("Asking attribute manager to purge Pacemaker Remote node %s",
+ host);
+ rc = pcmk__attrd_api_purge(attrd_api, host);
+ }
+ if (rc != pcmk_rc_ok) {
+ crm_err("Could not purge Pacemaker Remote node %s "
+ "in attribute manager%s: %s " CRM_XS " rc=%d",
+ host, when(), pcmk_rc_str(rc), rc);
+ }
+}
+
+void
+update_attrd_clear_failures(const char *host, const char *rsc, const char *op,
+ const char *interval_spec, gboolean is_remote_node)
+{
+ int rc = pcmk_rc_ok;
+
+ if (attrd_api == NULL) {
+ rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd);
+ }
+ if (rc == pcmk_rc_ok) {
+ const char *op_desc = pcmk__s(op, "operations");
+ const char *interval_desc = "all";
+ uint32_t attrd_opts = pcmk__node_attr_none;
+
+ if (op != NULL) {
+ interval_desc = pcmk__s(interval_spec, "nonrecurring");
+ }
+ if (is_remote_node) {
+ pcmk__set_node_attr_flags(attrd_opts, pcmk__node_attr_remote);
+ }
+ crm_info("Asking attribute manager to clear failure of %s %s for %s "
+ "on %s node %s", interval_desc, op_desc, rsc,
+ node_type(is_remote_node), host);
+ rc = pcmk__attrd_api_clear_failures(attrd_api, host, rsc, op,
+ interval_spec, NULL, attrd_opts);
+ }
+ if (rc != pcmk_rc_ok) {
+ crm_err("Could not clear failure attributes for %s on %s node %s%s: %s "
+ CRM_XS " rc=%d", pcmk__s(rsc, "all resources"),
+ node_type(is_remote_node), host, when(), pcmk_rc_str(rc), rc);
+ }
+}
diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c
new file mode 100644
index 0000000..d578adc
--- /dev/null
+++ b/daemons/controld/controld_callbacks.c
@@ -0,0 +1,367 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <string.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/cluster.h>
+#include <crm/cib.h>
+
+#include <pacemaker-controld.h>
+
+/* From join_dc... */
+extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
+
+void
+crmd_ha_msg_filter(xmlNode * msg)
+{
+ if (AM_I_DC) {
+ const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
+
+ if (pcmk__str_eq(sys_from, CRM_SYSTEM_DC, pcmk__str_casei)) {
+ const char *from = crm_element_value(msg, F_ORIG);
+
+ if (!pcmk__str_eq(from, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ int level = LOG_INFO;
+ const char *op = crm_element_value(msg, F_CRM_TASK);
+
+ /* make sure the election happens NOW */
+ if (controld_globals.fsa_state != S_ELECTION) {
+ ha_msg_input_t new_input;
+
+ level = LOG_WARNING;
+ new_input.msg = msg;
+ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input,
+ __func__);
+ }
+
+ do_crm_log(level, "Another DC detected: %s (op=%s)", from, op);
+ goto done;
+ }
+ }
+
+ } else {
+ const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
+
+ if (pcmk__str_eq(sys_to, CRM_SYSTEM_DC, pcmk__str_casei)) {
+ return;
+ }
+ }
+
+ /* crm_log_xml_trace(msg, "HA[inbound]"); */
+ route_message(C_HA_MESSAGE, msg);
+
+ done:
+ controld_trigger_fsa();
+}
+
+/*!
+ * \internal
+ * \brief Check whether a node is online
+ *
+ * \param[in] node Node to check
+ *
+ * \retval -1 if completely dead
+ * \retval 0 if partially alive
+ * \retval 1 if completely alive
+ */
+static int
+node_alive(const crm_node_t *node)
+{
+ if (pcmk_is_set(node->flags, crm_remote_node)) {
+ // Pacemaker Remote nodes can't be partially alive
+ return pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei) ? 1: -1;
+
+ } else if (crm_is_peer_active(node)) {
+ // Completely up cluster node: both cluster member and peer
+ return 1;
+
+ } else if (!pcmk_is_set(node->processes, crm_get_cluster_proc())
+ && !pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)) {
+ // Completely down cluster node: neither cluster member nor peer
+ return -1;
+ }
+
+ // Partially up cluster node: only cluster member or only peer
+ return 0;
+}
+
+#define state_text(state) ((state)? (const char *)(state) : "in unknown state")
+
+void
+peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
+{
+ uint32_t old = 0;
+ bool appeared = FALSE;
+ bool is_remote = pcmk_is_set(node->flags, crm_remote_node);
+
+ /* The controller waits to receive some information from the membership
+ * layer before declaring itself operational. If this is being called for a
+ * cluster node, indicate that we have it.
+ */
+ if (!is_remote) {
+ controld_set_fsa_input_flags(R_PEER_DATA);
+ }
+
+ if (type == crm_status_processes
+ && pcmk_is_set(node->processes, crm_get_cluster_proc())
+ && !AM_I_DC
+ && !is_remote) {
+ /*
+ * This is a hack until we can send to a nodeid and/or we fix node name lookups
+ * These messages are ignored in crmd_ha_msg_filter()
+ */
+ xmlNode *query = create_request(CRM_OP_HELLO, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
+
+ crm_debug("Sending hello to node %u so that it learns our node name", node->id);
+ send_cluster_message(node, crm_msg_crmd, query, FALSE);
+
+ free_xml(query);
+ }
+
+ if (node->uname == NULL) {
+ return;
+ }
+
+ switch (type) {
+ case crm_status_uname:
+ /* If we've never seen the node, then it also won't be in the status section */
+ crm_info("%s node %s is now %s",
+ (is_remote? "Remote" : "Cluster"),
+ node->uname, state_text(node->state));
+ return;
+
+ case crm_status_nstate:
+ /* This callback should not be called unless the state actually
+ * changed, but here's a failsafe just in case.
+ */
+ CRM_CHECK(!pcmk__str_eq(data, node->state, pcmk__str_casei),
+ return);
+
+ crm_info("%s node %s is now %s (was %s)",
+ (is_remote? "Remote" : "Cluster"),
+ node->uname, state_text(node->state), state_text(data));
+
+ if (pcmk__str_eq(CRM_NODE_MEMBER, node->state, pcmk__str_casei)) {
+ appeared = TRUE;
+ if (!is_remote) {
+ remove_stonith_cleanup(node->uname);
+ }
+ } else {
+ controld_remove_failed_sync_node(node->uname);
+ controld_remove_voter(node->uname);
+ }
+
+ crmd_alert_node_event(node);
+ break;
+
+ case crm_status_processes:
+ CRM_CHECK(data != NULL, return);
+ old = *(const uint32_t *)data;
+ appeared = pcmk_is_set(node->processes, crm_get_cluster_proc());
+
+ {
+ const char *dc_s = controld_globals.dc_name;
+
+ if ((dc_s == NULL) && AM_I_DC) {
+ dc_s = "true";
+ }
+
+ crm_info("Node %s is %s a peer " CRM_XS
+ " DC=%s old=%#07x new=%#07x",
+ node->uname, (appeared? "now" : "no longer"),
+ pcmk__s(dc_s, "<none>"), old, node->processes);
+ }
+
+ if (!pcmk_is_set((node->processes ^ old), crm_get_cluster_proc())) {
+ /* Peer status did not change. This should not be possible,
+ * since we don't track process flags other than peer status.
+ */
+ crm_trace("Process flag %#7x did not change from %#7x to %#7x",
+ crm_get_cluster_proc(), old, node->processes);
+ return;
+
+ }
+
+ if (!appeared) {
+ node->peer_lost = time(NULL);
+ controld_remove_failed_sync_node(node->uname);
+ controld_remove_voter(node->uname);
+ }
+
+ if (!pcmk_is_set(controld_globals.fsa_input_register,
+ R_CIB_CONNECTED)) {
+ crm_trace("Ignoring peer status change because not connected to CIB");
+ return;
+
+ } else if (controld_globals.fsa_state == S_STOPPING) {
+ crm_trace("Ignoring peer status change because stopping");
+ return;
+ }
+
+ if (!appeared
+ && pcmk__str_eq(node->uname, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ /* Did we get evicted? */
+ crm_notice("Our peer connection failed");
+ register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);
+
+ } else if (pcmk__str_eq(node->uname, controld_globals.dc_name,
+ pcmk__str_casei)
+ && !crm_is_peer_active(node)) {
+ /* Did the DC leave us? */
+ crm_notice("Our peer on the DC (%s) is dead",
+ controld_globals.dc_name);
+ register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);
+
+ /* @COMPAT DC < 1.1.13: If a DC shuts down normally, we don't
+ * want to fence it. Newer DCs will send their shutdown request
+ * to all peers, who will update the DC's expected state to
+ * down, thus avoiding fencing. We can safely erase the DC's
+ * transient attributes when it leaves in that case. However,
+ * the only way to avoid fencing older DCs is to leave the
+ * transient attributes intact until it rejoins.
+ */
+ if (compare_version(controld_globals.dc_version, "3.0.9") > 0) {
+ controld_delete_node_state(node->uname,
+ controld_section_attrs,
+ cib_scope_local);
+ }
+
+ } else if (AM_I_DC
+ || pcmk_is_set(controld_globals.flags, controld_dc_left)
+ || (controld_globals.dc_name == NULL)) {
+ /* This only needs to be done once, so normally the DC should do
+ * it. However if there is no DC, every node must do it, since
+ * there is no other way to ensure some one node does it.
+ */
+ if (appeared) {
+ te_trigger_stonith_history_sync(FALSE);
+ } else {
+ controld_delete_node_state(node->uname,
+ controld_section_attrs,
+ cib_scope_local);
+ }
+ }
+ break;
+ }
+
+ if (AM_I_DC) {
+ xmlNode *update = NULL;
+ int flags = node_update_peer;
+ int alive = node_alive(node);
+ pcmk__graph_action_t *down = match_down_event(node->uuid);
+
+ crm_trace("Alive=%d, appeared=%d, down=%d",
+ alive, appeared, (down? down->id : -1));
+
+ if (appeared && (alive > 0) && !is_remote) {
+ register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
+ }
+
+ if (down) {
+ const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK);
+
+ if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
+
+ /* tengine_stonith_callback() confirms fence actions */
+ crm_trace("Updating CIB %s fencer reported fencing of %s complete",
+ (pcmk_is_set(down->flags, pcmk__graph_action_confirmed)? "after" : "before"), node->uname);
+
+ } else if (!appeared && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
+
+ // Shutdown actions are immediately confirmed (i.e. no_wait)
+ if (!is_remote) {
+ flags |= node_update_join | node_update_expected;
+ crmd_peer_down(node, FALSE);
+ check_join_state(controld_globals.fsa_state, __func__);
+ }
+ if (alive >= 0) {
+ crm_info("%s of peer %s is in progress " CRM_XS " action=%d",
+ task, node->uname, down->id);
+ } else {
+ crm_notice("%s of peer %s is complete " CRM_XS " action=%d",
+ task, node->uname, down->id);
+ pcmk__update_graph(controld_globals.transition_graph, down);
+ trigger_graph();
+ }
+
+ } else {
+ crm_trace("Node %s is %s, was expected to %s (op %d)",
+ node->uname,
+ ((alive > 0)? "alive" :
+ ((alive < 0)? "dead" : "partially alive")),
+ task, down->id);
+ }
+
+ } else if (appeared == FALSE) {
+ if ((controld_globals.transition_graph == NULL)
+ || (controld_globals.transition_graph->id == -1)) {
+ crm_info("Stonith/shutdown of node %s is unknown to the "
+ "current DC", node->uname);
+ } else {
+ crm_warn("Stonith/shutdown of node %s was not expected",
+ node->uname);
+ }
+ if (!is_remote) {
+ crm_update_peer_join(__func__, node, crm_join_none);
+ check_join_state(controld_globals.fsa_state, __func__);
+ }
+ abort_transition(INFINITY, pcmk__graph_restart, "Node failure",
+ NULL);
+ fail_incompletable_actions(controld_globals.transition_graph,
+ node->uuid);
+
+ } else {
+ crm_trace("Node %s came up, was not expected to be down",
+ node->uname);
+ }
+
+ if (is_remote) {
+ /* A pacemaker_remote node won't have its cluster status updated
+ * in the CIB by membership-layer callbacks, so do it here.
+ */
+ flags |= node_update_cluster;
+
+ /* Trigger resource placement on newly integrated nodes */
+ if (appeared) {
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Pacemaker Remote node integrated", NULL);
+ }
+ }
+
+ /* Update the CIB node state */
+ update = create_node_state_update(node, flags, NULL, __func__);
+ if (update == NULL) {
+ crm_debug("Node state update not yet possible for %s", node->uname);
+ } else {
+ fsa_cib_anon_update(XML_CIB_TAG_STATUS, update);
+ }
+ free_xml(update);
+ }
+
+ controld_trigger_fsa();
+}
+
+gboolean
+crm_fsa_trigger(gpointer user_data)
+{
+ crm_trace("Invoked (queue len: %d)",
+ g_list_length(controld_globals.fsa_message_queue));
+ s_crmd_fsa(C_FSA_INTERNAL);
+ crm_trace("Exited (queue len: %d)",
+ g_list_length(controld_globals.fsa_message_queue));
+ return TRUE;
+}
diff --git a/daemons/controld/controld_callbacks.h b/daemons/controld/controld_callbacks.h
new file mode 100644
index 0000000..a69d515
--- /dev/null
+++ b/daemons/controld/controld_callbacks.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef CONTROLD_CALLBACKS__H
+# define CONTROLD_CALLBACKS__H
+
+#include <crm/cluster.h>
+
+extern void crmd_ha_msg_filter(xmlNode * msg);
+
+extern gboolean crm_fsa_trigger(gpointer user_data);
+
+extern void peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data);
+
+#endif
diff --git a/daemons/controld/controld_cib.c b/daemons/controld/controld_cib.c
new file mode 100644
index 0000000..94b99dd
--- /dev/null
+++ b/daemons/controld/controld_cib.c
@@ -0,0 +1,1138 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <unistd.h> /* sleep */
+
+#include <crm/common/alerts_internal.h>
+#include <crm/common/xml.h>
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/lrmd_internal.h>
+
+#include <pacemaker-controld.h>
+
+// Call ID of the most recent in-progress CIB resource update (or 0 if none)
+static int pending_rsc_update = 0;
+
+// Call IDs of requested CIB replacements that won't trigger a new election
+// (used as a set of gint values)
+static GHashTable *cib_replacements = NULL;
+
+/*!
+ * \internal
+ * \brief Store the call ID of a CIB replacement that the controller requested
+ *
+ * The \p do_cib_replaced() callback function will avoid triggering a new
+ * election when we're notified of one of these expected replacements.
+ *
+ * \param[in] call_id CIB call ID (or 0 for a synchronous call)
+ *
+ * \note This function should be called after making any asynchronous CIB
+ * request (or before making any synchronous CIB request) that may replace
+ * part of the nodes or status section. This may include CIB sync calls.
+ */
+void
+controld_record_cib_replace_call(int call_id)
+{
+ CRM_CHECK(call_id >= 0, return);
+
+ if (cib_replacements == NULL) {
+ cib_replacements = g_hash_table_new(NULL, NULL);
+ }
+
+ /* If the call ID is already present in the table, then it's old. We may not
+ * be removing them properly, and we could improperly ignore replacement
+ * notifications if cib_t:call_id wraps around.
+ */
+ CRM_LOG_ASSERT(g_hash_table_add(cib_replacements,
+ GINT_TO_POINTER((gint) call_id)));
+}
+
+/*!
+ * \internal
+ * \brief Remove the call ID of a CIB replacement from the replacements table
+ *
+ * \param[in] call_id CIB call ID (or 0 for a synchronous call)
+ *
+ * \return \p true if \p call_id was found in the table, or \p false otherwise
+ *
+ * \note CIB notifications run before CIB callbacks. If this function is called
+ * from within a callback, \p do_cib_replaced() will have removed
+ * \p call_id from the table first if relevant changes triggered a
+ * notification.
+ */
+bool
+controld_forget_cib_replace_call(int call_id)
+{
+ CRM_CHECK(call_id >= 0, return false);
+
+ if (cib_replacements == NULL) {
+ return false;
+ }
+ return g_hash_table_remove(cib_replacements,
+ GINT_TO_POINTER((gint) call_id));
+}
+
+/*!
+ * \internal
+ * \brief Empty the hash table containing call IDs of CIB replacement requests
+ */
+void
+controld_forget_all_cib_replace_calls(void)
+{
+ if (cib_replacements != NULL) {
+ g_hash_table_remove_all(cib_replacements);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Free the hash table containing call IDs of CIB replacement requests
+ */
+void
+controld_destroy_cib_replacements_table(void)
+{
+ if (cib_replacements != NULL) {
+ g_hash_table_destroy(cib_replacements);
+ cib_replacements = NULL;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Respond to a dropped CIB connection
+ *
+ * \param[in] user_data CIB connection that dropped
+ */
+static void
+handle_cib_disconnect(gpointer user_data)
+{
+ CRM_LOG_ASSERT(user_data == controld_globals.cib_conn);
+
+ controld_trigger_fsa();
+ controld_globals.cib_conn->state = cib_disconnected;
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) {
+ // @TODO This should trigger a reconnect, not a shutdown
+ crm_crit("Lost connection to the CIB manager, shutting down");
+ register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
+ controld_clear_fsa_input_flags(R_CIB_CONNECTED);
+
+ } else { // Expected
+ crm_info("Connection to the CIB manager terminated");
+ }
+}
+
+static void
+do_cib_updated(const char *event, xmlNode * msg)
+{
+ if (pcmk__alert_in_patchset(msg, TRUE)) {
+ controld_trigger_config();
+ }
+}
+
+static void
+do_cib_replaced(const char *event, xmlNode * msg)
+{
+ int call_id = 0;
+ const char *client_id = crm_element_value(msg, F_CIB_CLIENTID);
+ uint32_t change_section = cib_change_section_nodes
+ |cib_change_section_status;
+ long long value = 0;
+
+ crm_debug("Updating the CIB after a replace: DC=%s", pcmk__btoa(AM_I_DC));
+ if (!AM_I_DC) {
+ return;
+ }
+
+ if ((crm_element_value_int(msg, F_CIB_CALLID, &call_id) == 0)
+ && pcmk__str_eq(client_id, controld_globals.cib_client_id,
+ pcmk__str_none)
+ && controld_forget_cib_replace_call(call_id)) {
+ // We requested this replace op. No need to restart the join.
+ return;
+ }
+
+ if ((crm_element_value_ll(msg, F_CIB_CHANGE_SECTION, &value) < 0)
+ || (value < 0) || (value > UINT32_MAX)) {
+
+ crm_trace("Couldn't parse '%s' from message", F_CIB_CHANGE_SECTION);
+ } else {
+ change_section = (uint32_t) value;
+ }
+
+ if (pcmk_any_flags_set(change_section, cib_change_section_nodes
+ |cib_change_section_status)) {
+
+ /* start the join process again so we get everyone's LRM status */
+ populate_cib_nodes(node_update_quick|node_update_all, __func__);
+
+ register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
+ }
+}
+
+void
+controld_disconnect_cib_manager(void)
+{
+ cib_t *cib_conn = controld_globals.cib_conn;
+
+ CRM_ASSERT(cib_conn != NULL);
+
+ crm_info("Disconnecting from the CIB manager");
+
+ controld_clear_fsa_input_flags(R_CIB_CONNECTED);
+
+ cib_conn->cmds->del_notify_callback(cib_conn, T_CIB_REPLACE_NOTIFY,
+ do_cib_replaced);
+ cib_conn->cmds->del_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY,
+ do_cib_updated);
+ cib_free_callbacks(cib_conn);
+
+ if (cib_conn->state != cib_disconnected) {
+ cib_conn->cmds->set_secondary(cib_conn,
+ cib_scope_local|cib_discard_reply);
+ cib_conn->cmds->signoff(cib_conn);
+ }
+
+ crm_notice("Disconnected from the CIB manager");
+}
+
+/* A_CIB_STOP, A_CIB_START, O_CIB_RESTART */
+void
+do_cib_control(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ static int cib_retries = 0;
+
+ cib_t *cib_conn = controld_globals.cib_conn;
+
+ void (*dnotify_fn) (gpointer user_data) = handle_cib_disconnect;
+ void (*replace_cb) (const char *event, xmlNodePtr msg) = do_cib_replaced;
+ void (*update_cb) (const char *event, xmlNodePtr msg) = do_cib_updated;
+
+ int rc = pcmk_ok;
+
+ CRM_ASSERT(cib_conn != NULL);
+
+ if (pcmk_is_set(action, A_CIB_STOP)) {
+ if ((cib_conn->state != cib_disconnected)
+ && (pending_rsc_update != 0)) {
+
+ crm_info("Waiting for resource update %d to complete",
+ pending_rsc_update);
+ crmd_fsa_stall(FALSE);
+ return;
+ }
+ controld_disconnect_cib_manager();
+ }
+
+ if (!pcmk_is_set(action, A_CIB_START)) {
+ return;
+ }
+
+ if (cur_state == S_STOPPING) {
+ crm_err("Ignoring request to connect to the CIB manager after "
+ "shutdown");
+ return;
+ }
+
+ rc = cib_conn->cmds->signon(cib_conn, CRM_SYSTEM_CRMD,
+ cib_command_nonblocking);
+
+ if (rc != pcmk_ok) {
+ // A short wait that usually avoids stalling the FSA
+ sleep(1);
+ rc = cib_conn->cmds->signon(cib_conn, CRM_SYSTEM_CRMD,
+ cib_command_nonblocking);
+ }
+
+ if (rc != pcmk_ok) {
+ crm_info("Could not connect to the CIB manager: %s", pcmk_strerror(rc));
+
+ } else if (cib_conn->cmds->set_connection_dnotify(cib_conn,
+ dnotify_fn) != pcmk_ok) {
+ crm_err("Could not set dnotify callback");
+
+ } else if (cib_conn->cmds->add_notify_callback(cib_conn,
+ T_CIB_REPLACE_NOTIFY,
+ replace_cb) != pcmk_ok) {
+ crm_err("Could not set CIB notification callback (replace)");
+
+ } else if (cib_conn->cmds->add_notify_callback(cib_conn,
+ T_CIB_DIFF_NOTIFY,
+ update_cb) != pcmk_ok) {
+ crm_err("Could not set CIB notification callback (update)");
+
+ } else {
+ controld_set_fsa_input_flags(R_CIB_CONNECTED);
+ cib_retries = 0;
+ cib_conn->cmds->client_id(cib_conn, &controld_globals.cib_client_id,
+ NULL);
+ }
+
+ if (!pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) {
+ cib_retries++;
+
+ if (cib_retries < 30) {
+ crm_warn("Couldn't complete CIB registration %d times... "
+ "pause and retry", cib_retries);
+ controld_start_wait_timer();
+ crmd_fsa_stall(FALSE);
+
+ } else {
+ crm_err("Could not complete CIB registration %d times... "
+ "hard error", cib_retries);
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ }
+ }
+}
+
+#define MIN_CIB_OP_TIMEOUT (30)
+
+/*!
+ * \internal
+ * \brief Get the timeout (in seconds) that should be used with CIB operations
+ *
+ * \return The maximum of 30 seconds, the value of the PCMK_cib_timeout
+ * environment variable, or 10 seconds times one more than the number of
+ * nodes in the cluster.
+ */
+unsigned int
+cib_op_timeout(void)
+{
+ static int env_timeout = -1;
+ unsigned int calculated_timeout = 0;
+
+ if (env_timeout == -1) {
+ const char *env = getenv("PCMK_cib_timeout");
+
+ pcmk__scan_min_int(env, &env_timeout, MIN_CIB_OP_TIMEOUT);
+ crm_trace("Minimum CIB op timeout: %ds (environment: %s)",
+ env_timeout, (env? env : "none"));
+ }
+
+ calculated_timeout = 1 + crm_active_peers();
+ if (crm_remote_peer_cache) {
+ calculated_timeout += g_hash_table_size(crm_remote_peer_cache);
+ }
+ calculated_timeout *= 10;
+
+ calculated_timeout = QB_MAX(calculated_timeout, env_timeout);
+ crm_trace("Calculated timeout: %us", calculated_timeout);
+
+ if (controld_globals.cib_conn) {
+ controld_globals.cib_conn->call_timeout = calculated_timeout;
+ }
+ return calculated_timeout;
+}
+
+/*!
+ * \internal
+ * \brief Get CIB call options to use local scope if primary is unavailable
+ *
+ * \return CIB call options
+ */
+int
+crmd_cib_smart_opt(void)
+{
+ int call_opt = cib_none;
+
+ if ((controld_globals.fsa_state == S_ELECTION)
+ || (controld_globals.fsa_state == S_PENDING)) {
+ crm_info("Sending update to local CIB in state: %s",
+ fsa_state2string(controld_globals.fsa_state));
+ cib__set_call_options(call_opt, "update", cib_scope_local);
+ }
+ return call_opt;
+}
+
+static void
+cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output,
+ void *user_data)
+{
+ char *desc = user_data;
+
+ if (rc == 0) {
+ crm_debug("Deletion of %s (via CIB call %d) succeeded", desc, call_id);
+ } else {
+ crm_warn("Deletion of %s (via CIB call %d) failed: %s " CRM_XS " rc=%d",
+ desc, call_id, pcmk_strerror(rc), rc);
+ }
+}
+
+// Searches for various portions of node_state to delete
+
+// Match a particular node's node_state (takes node name 1x)
+#define XPATH_NODE_STATE "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']"
+
+// Node's lrm section (name 1x)
+#define XPATH_NODE_LRM XPATH_NODE_STATE "/" XML_CIB_TAG_LRM
+
+/* Node's lrm_rsc_op entries and lrm_resource entries without unexpired lock
+ * (name 2x, (seconds_since_epoch - XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT) 1x)
+ */
+#define XPATH_NODE_LRM_UNLOCKED XPATH_NODE_STATE "//" XML_LRM_TAG_RSC_OP \
+ "|" XPATH_NODE_STATE \
+ "//" XML_LRM_TAG_RESOURCE \
+ "[not(@" XML_CONFIG_ATTR_SHUTDOWN_LOCK ") " \
+ "or " XML_CONFIG_ATTR_SHUTDOWN_LOCK "<%lld]"
+
+// Node's transient_attributes section (name 1x)
+#define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" XML_TAG_TRANSIENT_NODEATTRS
+
+// Everything under node_state (name 1x)
+#define XPATH_NODE_ALL XPATH_NODE_STATE "/*"
+
+/* Unlocked history + transient attributes
+ * (name 2x, (seconds_since_epoch - XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT) 1x,
+ * name 1x)
+ */
+#define XPATH_NODE_ALL_UNLOCKED XPATH_NODE_LRM_UNLOCKED "|" XPATH_NODE_ATTRS
+
+/*!
+ * \internal
+ * \brief Delete subsection of a node's CIB node_state
+ *
+ * \param[in] uname Desired node
+ * \param[in] section Subsection of node_state to delete
+ * \param[in] options CIB call options to use
+ */
+void
+controld_delete_node_state(const char *uname, enum controld_section_e section,
+ int options)
+{
+ cib_t *cib_conn = controld_globals.cib_conn;
+
+ char *xpath = NULL;
+ char *desc = NULL;
+
+ // Shutdown locks that started before this time are expired
+ long long expire = (long long) time(NULL)
+ - controld_globals.shutdown_lock_limit;
+
+ CRM_CHECK(uname != NULL, return);
+ switch (section) {
+ case controld_section_lrm:
+ xpath = crm_strdup_printf(XPATH_NODE_LRM, uname);
+ desc = crm_strdup_printf("resource history for node %s", uname);
+ break;
+ case controld_section_lrm_unlocked:
+ xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED,
+ uname, uname, expire);
+ desc = crm_strdup_printf("resource history (other than shutdown "
+ "locks) for node %s", uname);
+ break;
+ case controld_section_attrs:
+ xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname);
+ desc = crm_strdup_printf("transient attributes for node %s", uname);
+ break;
+ case controld_section_all:
+ xpath = crm_strdup_printf(XPATH_NODE_ALL, uname);
+ desc = crm_strdup_printf("all state for node %s", uname);
+ break;
+ case controld_section_all_unlocked:
+ xpath = crm_strdup_printf(XPATH_NODE_ALL_UNLOCKED,
+ uname, uname, expire, uname);
+ desc = crm_strdup_printf("all state (other than shutdown locks) "
+ "for node %s", uname);
+ break;
+ }
+
+ if (cib_conn == NULL) {
+ crm_warn("Unable to delete %s: no CIB connection", desc);
+ free(desc);
+ } else {
+ int call_id;
+
+ cib__set_call_options(options, "node state deletion",
+ cib_xpath|cib_multiple);
+ call_id = cib_conn->cmds->remove(cib_conn, xpath, NULL, options);
+ crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s",
+ desc, call_id, xpath);
+ fsa_register_cib_callback(call_id, desc, cib_delete_callback);
+ // CIB library handles freeing desc
+ }
+ free(xpath);
+}
+
+// Takes node name and resource ID
+#define XPATH_RESOURCE_HISTORY "//" XML_CIB_TAG_STATE \
+ "[@" XML_ATTR_UNAME "='%s']/" \
+ XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \
+ "/" XML_LRM_TAG_RESOURCE \
+ "[@" XML_ATTR_ID "='%s']"
+// @TODO could add "and @XML_CONFIG_ATTR_SHUTDOWN_LOCK" to limit to locks
+
+/*!
+ * \internal
+ * \brief Clear resource history from CIB for a given resource and node
+ *
+ * \param[in] rsc_id ID of resource to be cleared
+ * \param[in] node Node whose resource history should be cleared
+ * \param[in] user_name ACL user name to use
+ * \param[in] call_options CIB call options
+ *
+ * \return Standard Pacemaker return code
+ */
+int
+controld_delete_resource_history(const char *rsc_id, const char *node,
+ const char *user_name, int call_options)
+{
+ char *desc = NULL;
+ char *xpath = NULL;
+ int rc = pcmk_rc_ok;
+
+ CRM_CHECK((rsc_id != NULL) && (node != NULL), return EINVAL);
+
+ desc = crm_strdup_printf("resource history for %s on %s", rsc_id, node);
+ if (controld_globals.cib_conn == NULL) {
+ crm_err("Unable to clear %s: no CIB connection", desc);
+ free(desc);
+ return ENOTCONN;
+ }
+
+ // Ask CIB to delete the entry
+ xpath = crm_strdup_printf(XPATH_RESOURCE_HISTORY, node, rsc_id);
+ rc = cib_internal_op(controld_globals.cib_conn, PCMK__CIB_REQUEST_DELETE,
+ NULL, xpath, NULL, NULL, call_options|cib_xpath,
+ user_name);
+
+ if (rc < 0) {
+ rc = pcmk_legacy2rc(rc);
+ crm_err("Could not delete resource status of %s on %s%s%s: %s "
+ CRM_XS " rc=%d", rsc_id, node,
+ (user_name? " for user " : ""), (user_name? user_name : ""),
+ pcmk_rc_str(rc), rc);
+ free(desc);
+ free(xpath);
+ return rc;
+ }
+
+ if (pcmk_is_set(call_options, cib_sync_call)) {
+ if (pcmk_is_set(call_options, cib_dryrun)) {
+ crm_debug("Deletion of %s would succeed", desc);
+ } else {
+ crm_debug("Deletion of %s succeeded", desc);
+ }
+ free(desc);
+
+ } else {
+ crm_info("Clearing %s (via CIB call %d) " CRM_XS " xpath=%s",
+ desc, rc, xpath);
+ fsa_register_cib_callback(rc, desc, cib_delete_callback);
+ // CIB library handles freeing desc
+ }
+
+ free(xpath);
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief Build XML and string of parameters meeting some criteria, for digest
+ *
+ * \param[in] op Executor event with parameter table to use
+ * \param[in] metadata Parsed meta-data for executed resource agent
+ * \param[in] param_type Flag used for selection criteria
+ * \param[out] result Will be set to newly created XML with selected
+ * parameters as attributes
+ *
+ * \return Newly allocated space-separated string of parameter names
+ * \note Selection criteria varies by param_type: for the restart digest, we
+ * want parameters that are *not* marked reloadable (OCF 1.1) or that
+ * *are* marked unique (pre-1.1), for both string and XML results; for the
+ * secure digest, we want parameters that *are* marked private for the
+ * string, but parameters that are *not* marked private for the XML.
+ * \note It is the caller's responsibility to free the string return value with
+ * \p g_string_free() and the XML result with \p free_xml().
+ */
+static GString *
+build_parameter_list(const lrmd_event_data_t *op,
+ const struct ra_metadata_s *metadata,
+ enum ra_param_flags_e param_type, xmlNode **result)
+{
+ GString *list = NULL;
+
+ *result = create_xml_node(NULL, XML_TAG_PARAMS);
+
+ /* Consider all parameters only except private ones to be consistent with
+ * what scheduler does with calculate_secure_digest().
+ */
+ if (param_type == ra_param_private
+ && compare_version(controld_globals.dc_version, "3.16.0") >= 0) {
+ g_hash_table_foreach(op->params, hash2field, *result);
+ pcmk__filter_op_for_digest(*result);
+ }
+
+ for (GList *iter = metadata->ra_params; iter != NULL; iter = iter->next) {
+ struct ra_param_s *param = (struct ra_param_s *) iter->data;
+
+ bool accept_for_list = false;
+ bool accept_for_xml = false;
+
+ switch (param_type) {
+ case ra_param_reloadable:
+ accept_for_list = !pcmk_is_set(param->rap_flags, param_type);
+ accept_for_xml = accept_for_list;
+ break;
+
+ case ra_param_unique:
+ accept_for_list = pcmk_is_set(param->rap_flags, param_type);
+ accept_for_xml = accept_for_list;
+ break;
+
+ case ra_param_private:
+ accept_for_list = pcmk_is_set(param->rap_flags, param_type);
+ accept_for_xml = !accept_for_list;
+ break;
+ }
+
+ if (accept_for_list) {
+ crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type));
+
+ if (list == NULL) {
+ // We will later search for " WORD ", so start list with a space
+ pcmk__add_word(&list, 256, " ");
+ }
+ pcmk__add_word(&list, 0, param->rap_name);
+
+ } else {
+ crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type));
+ }
+
+ if (accept_for_xml) {
+ const char *v = g_hash_table_lookup(op->params, param->rap_name);
+
+ if (v != NULL) {
+ crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v);
+ crm_xml_add(*result, param->rap_name, v);
+ }
+
+ } else {
+ crm_trace("Removing attr %s from the xml result", param->rap_name);
+ xml_remove_prop(*result, param->rap_name);
+ }
+ }
+
+ if (list != NULL) {
+ // We will later search for " WORD ", so end list with a space
+ pcmk__add_word(&list, 0, " ");
+ }
+ return list;
+}
+
+static void
+append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata,
+ xmlNode *update, const char *version)
+{
+ GString *list = NULL;
+ char *digest = NULL;
+ xmlNode *restart = NULL;
+
+ CRM_LOG_ASSERT(op->params != NULL);
+
+ if (op->interval_ms > 0) {
+ /* monitors are not reloadable */
+ return;
+ }
+
+ if (pcmk_is_set(metadata->ra_flags, ra_supports_reload_agent)) {
+ // Add parameters not marked reloadable to the "op-force-restart" list
+ list = build_parameter_list(op, metadata, ra_param_reloadable,
+ &restart);
+
+ } else if (pcmk_is_set(metadata->ra_flags, ra_supports_legacy_reload)) {
+ /* @COMPAT pre-OCF-1.1 resource agents
+ *
+ * Before OCF 1.1, Pacemaker abused "unique=0" to indicate
+ * reloadability. Add any parameters with unique="1" to the
+ * "op-force-restart" list.
+ */
+ list = build_parameter_list(op, metadata, ra_param_unique, &restart);
+
+ } else {
+ // Resource does not support agent reloads
+ return;
+ }
+
+ digest = calculate_operation_digest(restart, version);
+ /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload,
+ * no matter if it actually supports any parameters with unique="1"). */
+ crm_xml_add(update, XML_LRM_ATTR_OP_RESTART,
+ (list == NULL)? "" : (const char *) list->str);
+ crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest);
+
+ if ((list != NULL) && (list->len > 0)) {
+ crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str);
+ } else {
+ crm_trace("%s: %s", op->rsc_id, digest);
+ }
+
+ if (list != NULL) {
+ g_string_free(list, TRUE);
+ }
+ free_xml(restart);
+ free(digest);
+}
+
+static void
+append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata,
+ xmlNode *update, const char *version)
+{
+ GString *list = NULL;
+ char *digest = NULL;
+ xmlNode *secure = NULL;
+
+ CRM_LOG_ASSERT(op->params != NULL);
+
+ /*
+ * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the
+ * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on
+ * the insecure ones
+ */
+ list = build_parameter_list(op, metadata, ra_param_private, &secure);
+
+ if (list != NULL) {
+ digest = calculate_operation_digest(secure, version);
+ crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, (const char *) list->str);
+ crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest);
+
+ crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str);
+ g_string_free(list, TRUE);
+ } else {
+ crm_trace("%s: no secure parameters", op->rsc_id);
+ }
+
+ free_xml(secure);
+ free(digest);
+}
+
+/*!
+ * \internal
+ * \brief Create XML for a resource history entry
+ *
+ * \param[in] func Function name of caller
+ * \param[in,out] parent XML to add entry to
+ * \param[in] rsc Affected resource
+ * \param[in,out] op Action to add an entry for (or NULL to do nothing)
+ * \param[in] node_name Node where action occurred
+ */
+void
+controld_add_resource_history_xml_as(const char *func, xmlNode *parent,
+ const lrmd_rsc_info_t *rsc,
+ lrmd_event_data_t *op,
+ const char *node_name)
+{
+ int target_rc = 0;
+ xmlNode *xml_op = NULL;
+ struct ra_metadata_s *metadata = NULL;
+ const char *caller_version = NULL;
+ lrm_state_t *lrm_state = NULL;
+
+ if (op == NULL) {
+ return;
+ }
+
+ target_rc = rsc_op_expected_rc(op);
+
+ caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION);
+ CRM_CHECK(caller_version != NULL, caller_version = CRM_FEATURE_SET);
+
+ xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc,
+ controld_globals.our_nodename, func);
+ if (xml_op == NULL) {
+ return;
+ }
+
+ if ((rsc == NULL) || (op->params == NULL)
+ || !crm_op_needs_metadata(rsc->standard, op->op_type)) {
+
+ crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)",
+ op->op_type, op->rsc_id, op->params, rsc);
+ return;
+ }
+
+ lrm_state = lrm_state_find(node_name);
+ if (lrm_state == NULL) {
+ crm_warn("Cannot calculate digests for operation " PCMK__OP_FMT
+ " because we have no connection to executor for %s",
+ op->rsc_id, op->op_type, op->interval_ms, node_name);
+ return;
+ }
+
+ /* Ideally the metadata is cached, and the agent is just a fallback.
+ *
+ * @TODO Go through all callers and ensure they get metadata asynchronously
+ * first.
+ */
+ metadata = controld_get_rsc_metadata(lrm_state, rsc,
+ controld_metadata_from_agent
+ |controld_metadata_from_cache);
+ if (metadata == NULL) {
+ return;
+ }
+
+ crm_trace("Including additional digests for %s:%s:%s",
+ rsc->standard, rsc->provider, rsc->type);
+ append_restart_list(op, metadata, xml_op, caller_version);
+ append_secure_list(op, metadata, xml_op, caller_version);
+
+ return;
+}
+
+/*!
+ * \internal
+ * \brief Record an action as pending in the CIB, if appropriate
+ *
+ * \param[in] node_name Node where the action is pending
+ * \param[in] rsc Resource that action is for
+ * \param[in,out] op Pending action
+ *
+ * \return true if action was recorded in CIB, otherwise false
+ */
+bool
+controld_record_pending_op(const char *node_name, const lrmd_rsc_info_t *rsc,
+ lrmd_event_data_t *op)
+{
+ const char *record_pending = NULL;
+
+ CRM_CHECK((node_name != NULL) && (rsc != NULL) && (op != NULL),
+ return false);
+
+ // Never record certain operation types as pending
+ if ((op->op_type == NULL) || (op->params == NULL)
+ || !controld_action_is_recordable(op->op_type)) {
+ return false;
+ }
+
+ // Check action's record-pending meta-attribute (defaults to true)
+ record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING);
+ if ((record_pending != NULL) && !crm_is_true(record_pending)) {
+ return false;
+ }
+
+ op->call_id = -1;
+ op->t_run = time(NULL);
+ op->t_rcchange = op->t_run;
+
+ lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL);
+
+ crm_debug("Recording pending %s-interval %s for %s on %s in the CIB",
+ pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id,
+ node_name);
+ controld_update_resource_history(node_name, rsc, op, 0);
+ return true;
+}
+
+static void
+cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ switch (rc) {
+ case pcmk_ok:
+ case -pcmk_err_diff_failed:
+ case -pcmk_err_diff_resync:
+ crm_trace("Resource update %d complete: rc=%d", call_id, rc);
+ break;
+ default:
+ crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc));
+ }
+
+ if (call_id == pending_rsc_update) {
+ pending_rsc_update = 0;
+ controld_trigger_fsa();
+ }
+}
+
+/* Only successful stops, and probes that found the resource inactive, get locks
+ * recorded in the history. This ensures the resource stays locked to the node
+ * until it is active there again after the node comes back up.
+ */
+static bool
+should_preserve_lock(lrmd_event_data_t *op)
+{
+ if (!pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
+ return false;
+ }
+ if (!strcmp(op->op_type, RSC_STOP) && (op->rc == PCMK_OCF_OK)) {
+ return true;
+ }
+ if (!strcmp(op->op_type, RSC_STATUS) && (op->rc == PCMK_OCF_NOT_RUNNING)) {
+ return true;
+ }
+ return false;
+}
+
+/*!
+ * \internal
+ * \brief Request a CIB update
+ *
+ * \param[in] section Section of CIB to update
+ * \param[in,out] data New XML of CIB section to update
+ * \param[in] options CIB call options
+ * \param[in] callback If not NULL, set this as the operation callback
+ *
+ * \return Standard Pacemaker return code
+ *
+ * \note If \p callback is \p cib_rsc_callback(), the CIB update's call ID is
+ * stored in \p pending_rsc_update on success.
+ */
+int
+controld_update_cib(const char *section, xmlNode *data, int options,
+ void (*callback)(xmlNode *, int, int, xmlNode *, void *))
+{
+ int cib_rc = -ENOTCONN;
+
+ CRM_ASSERT(data != NULL);
+
+ if (controld_globals.cib_conn != NULL) {
+ cib_rc = cib_internal_op(controld_globals.cib_conn,
+ PCMK__CIB_REQUEST_MODIFY, NULL, section,
+ data, NULL, options, NULL);
+ if (cib_rc >= 0) {
+ crm_debug("Submitted CIB update %d for %s section",
+ cib_rc, section);
+ }
+ }
+
+ if (callback == NULL) {
+ if (cib_rc < 0) {
+ crm_err("Failed to update CIB %s section: %s",
+ section, pcmk_rc_str(pcmk_legacy2rc(cib_rc)));
+ }
+
+ } else {
+ if ((cib_rc >= 0) && (callback == cib_rsc_callback)) {
+ /* Checking for a particular callback is a little hacky, but it
+ * didn't seem worth adding an output argument for cib_rc for just
+ * one use case.
+ */
+ pending_rsc_update = cib_rc;
+ }
+ fsa_register_cib_callback(cib_rc, NULL, callback);
+ }
+
+ return (cib_rc >= 0)? pcmk_rc_ok : pcmk_legacy2rc(cib_rc);
+}
+
+/*!
+ * \internal
+ * \brief Update resource history entry in CIB
+ *
+ * \param[in] node_name Node where action occurred
+ * \param[in] rsc Resource that action is for
+ * \param[in,out] op Action to record
+ * \param[in] lock_time If nonzero, when resource was locked to node
+ *
+ * \note On success, the CIB update's call ID will be stored in
+ * pending_rsc_update.
+ */
+void
+controld_update_resource_history(const char *node_name,
+ const lrmd_rsc_info_t *rsc,
+ lrmd_event_data_t *op, time_t lock_time)
+{
+ xmlNode *update = NULL;
+ xmlNode *xml = NULL;
+ int call_opt = crmd_cib_smart_opt();
+ const char *node_id = NULL;
+ const char *container = NULL;
+
+ CRM_CHECK((node_name != NULL) && (op != NULL), return);
+
+ if (rsc == NULL) {
+ crm_warn("Resource %s no longer exists in the executor", op->rsc_id);
+ controld_ack_event_directly(NULL, NULL, rsc, op, op->rsc_id);
+ return;
+ }
+
+ // <status>
+ update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
+
+ // <node_state ...>
+ xml = create_xml_node(update, XML_CIB_TAG_STATE);
+ if (pcmk__str_eq(node_name, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ node_id = controld_globals.our_uuid;
+ } else {
+ node_id = node_name;
+ pcmk__xe_set_bool_attr(xml, XML_NODE_IS_REMOTE, true);
+ }
+ crm_xml_add(xml, XML_ATTR_ID, node_id);
+ crm_xml_add(xml, XML_ATTR_UNAME, node_name);
+ crm_xml_add(xml, XML_ATTR_ORIGIN, __func__);
+
+ // <lrm ...>
+ xml = create_xml_node(xml, XML_CIB_TAG_LRM);
+ crm_xml_add(xml, XML_ATTR_ID, node_id);
+
+ // <lrm_resources>
+ xml = create_xml_node(xml, XML_LRM_TAG_RESOURCES);
+
+ // <lrm_resource ...>
+ xml = create_xml_node(xml, XML_LRM_TAG_RESOURCE);
+ crm_xml_add(xml, XML_ATTR_ID, op->rsc_id);
+ crm_xml_add(xml, XML_AGENT_ATTR_CLASS, rsc->standard);
+ crm_xml_add(xml, XML_AGENT_ATTR_PROVIDER, rsc->provider);
+ crm_xml_add(xml, XML_ATTR_TYPE, rsc->type);
+ if (lock_time != 0) {
+ /* Actions on a locked resource should either preserve the lock by
+ * recording it with the action result, or clear it.
+ */
+ if (!should_preserve_lock(op)) {
+ lock_time = 0;
+ }
+ crm_xml_add_ll(xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
+ (long long) lock_time);
+ }
+ if (op->params != NULL) {
+ container = g_hash_table_lookup(op->params,
+ CRM_META "_" XML_RSC_ATTR_CONTAINER);
+ if (container != NULL) {
+ crm_trace("Resource %s is a part of container resource %s",
+ op->rsc_id, container);
+ crm_xml_add(xml, XML_RSC_ATTR_CONTAINER, container);
+ }
+ }
+
+ // <lrm_resource_op ...> (possibly more than one)
+ controld_add_resource_history_xml(xml, rsc, op, node_name);
+
+ /* Update CIB asynchronously. Even if it fails, the resource state should be
+ * discovered during the next election. Worst case, the node is wrongly
+ * fenced for running a resource it isn't.
+ */
+ crm_log_xml_trace(update, __func__);
+ controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, cib_rsc_callback);
+ free_xml(update);
+}
+
+/*!
+ * \internal
+ * \brief Erase an LRM history entry from the CIB, given the operation data
+ *
+ * \param[in] op Operation whose history should be deleted
+ */
+void
+controld_delete_action_history(const lrmd_event_data_t *op)
+{
+ xmlNode *xml_top = NULL;
+
+ CRM_CHECK(op != NULL, return);
+
+ xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP);
+ crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id);
+ crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data);
+
+ if (op->interval_ms > 0) {
+ char *op_id = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms);
+
+ /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */
+ crm_xml_add(xml_top, XML_ATTR_ID, op_id);
+ free(op_id);
+ }
+
+ crm_debug("Erasing resource operation history for " PCMK__OP_FMT " (call=%d)",
+ op->rsc_id, op->op_type, op->interval_ms, op->call_id);
+
+ controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn,
+ XML_CIB_TAG_STATUS, xml_top,
+ cib_none);
+
+ crm_log_xml_trace(xml_top, "op:cancel");
+ free_xml(xml_top);
+}
+
+/* Define xpath to find LRM resource history entry by node and resource */
+#define XPATH_HISTORY \
+ "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \
+ "/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \
+ "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \
+ "/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \
+ "/" XML_LRM_TAG_RSC_OP
+
+/* ... and also by operation key */
+#define XPATH_HISTORY_ID XPATH_HISTORY \
+ "[@" XML_ATTR_ID "='%s']"
+
+/* ... and also by operation key and operation call ID */
+#define XPATH_HISTORY_CALL XPATH_HISTORY \
+ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_CALLID "='%d']"
+
+/* ... and also by operation key and original operation key */
+#define XPATH_HISTORY_ORIG XPATH_HISTORY \
+ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_TASK_KEY "='%s']"
+
+/*!
+ * \internal
+ * \brief Delete a last_failure resource history entry from the CIB
+ *
+ * \param[in] rsc_id Name of resource to clear history for
+ * \param[in] node Name of node to clear history for
+ * \param[in] action If specified, delete only if this was failed action
+ * \param[in] interval_ms If \p action is specified, it has this interval
+ */
+void
+controld_cib_delete_last_failure(const char *rsc_id, const char *node,
+ const char *action, guint interval_ms)
+{
+ char *xpath = NULL;
+ char *last_failure_key = NULL;
+
+ CRM_CHECK((rsc_id != NULL) && (node != NULL), return);
+
+ // Generate XPath to match desired entry
+ last_failure_key = pcmk__op_key(rsc_id, "last_failure", 0);
+ if (action == NULL) {
+ xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id,
+ last_failure_key);
+ } else {
+ char *action_key = pcmk__op_key(rsc_id, action, interval_ms);
+
+ xpath = crm_strdup_printf(XPATH_HISTORY_ORIG, node, rsc_id,
+ last_failure_key, action_key);
+ free(action_key);
+ }
+ free(last_failure_key);
+
+ controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath,
+ NULL, cib_xpath);
+ free(xpath);
+}
+
+/*!
+ * \internal
+ * \brief Delete resource history entry from the CIB, given operation key
+ *
+ * \param[in] rsc_id Name of resource to clear history for
+ * \param[in] node Name of node to clear history for
+ * \param[in] key Operation key of operation to clear history for
+ * \param[in] call_id If specified, delete entry only if it has this call ID
+ */
+void
+controld_delete_action_history_by_key(const char *rsc_id, const char *node,
+ const char *key, int call_id)
+{
+ char *xpath = NULL;
+
+ CRM_CHECK((rsc_id != NULL) && (node != NULL) && (key != NULL), return);
+
+ if (call_id > 0) {
+ xpath = crm_strdup_printf(XPATH_HISTORY_CALL, node, rsc_id, key,
+ call_id);
+ } else {
+ xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id, key);
+ }
+ controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath,
+ NULL, cib_xpath);
+ free(xpath);
+}
diff --git a/daemons/controld/controld_cib.h b/daemons/controld/controld_cib.h
new file mode 100644
index 0000000..bd9492a
--- /dev/null
+++ b/daemons/controld/controld_cib.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef PCMK__CONTROLD_CIB__H
+#define PCMK__CONTROLD_CIB__H
+
+#include <crm_internal.h>
+
+#include <glib.h>
+
+#include <crm/crm.h>
+#include <crm/common/xml.h>
+#include <crm/cib/internal.h> // PCMK__CIB_REQUEST_MODIFY
+#include "controld_globals.h" // controld_globals.cib_conn
+
+static inline void
+fsa_cib_anon_update(const char *section, xmlNode *data) {
+ if (controld_globals.cib_conn == NULL) {
+ crm_err("No CIB connection available");
+ } else {
+ controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
+ section, data,
+ cib_scope_local|cib_can_create);
+ }
+}
+
+static inline void
+fsa_cib_anon_update_discard_reply(const char *section, xmlNode *data) {
+ if (controld_globals.cib_conn == NULL) {
+ crm_err("No CIB connection available");
+ } else {
+ controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
+ section, data,
+ cib_scope_local
+ |cib_can_create
+ |cib_discard_reply);
+ }
+}
+
+void controld_record_cib_replace_call(int call_id);
+bool controld_forget_cib_replace_call(int call_id);
+void controld_forget_all_cib_replace_calls(void);
+void controld_destroy_cib_replacements_table(void);
+
+int controld_update_cib(const char *section, xmlNode *data, int options,
+ void (*callback)(xmlNode *, int, int, xmlNode *,
+ void *));
+unsigned int cib_op_timeout(void);
+
+// Subsections of node_state
+enum controld_section_e {
+ controld_section_lrm,
+ controld_section_lrm_unlocked,
+ controld_section_attrs,
+ controld_section_all,
+ controld_section_all_unlocked
+};
+
+void controld_delete_node_state(const char *uname,
+ enum controld_section_e section, int options);
+int controld_delete_resource_history(const char *rsc_id, const char *node,
+ const char *user_name, int call_options);
+
+/* Convenience macro for registering a CIB callback
+ * (assumes that data can be freed with free())
+ */
+# define fsa_register_cib_callback(id, data, fn) do { \
+ cib_t *cib_conn = controld_globals.cib_conn; \
+ \
+ CRM_ASSERT(cib_conn != NULL); \
+ cib_conn->cmds->register_callback_full(cib_conn, id, cib_op_timeout(), \
+ FALSE, data, #fn, fn, free); \
+ } while(0)
+
+void controld_add_resource_history_xml_as(const char *func, xmlNode *parent,
+ const lrmd_rsc_info_t *rsc,
+ lrmd_event_data_t *op,
+ const char *node_name);
+
+#define controld_add_resource_history_xml(parent, rsc, op, node_name) \
+ controld_add_resource_history_xml_as(__func__, (parent), (rsc), \
+ (op), (node_name))
+
+bool controld_record_pending_op(const char *node_name,
+ const lrmd_rsc_info_t *rsc,
+ lrmd_event_data_t *op);
+
+void controld_update_resource_history(const char *node_name,
+ const lrmd_rsc_info_t *rsc,
+ lrmd_event_data_t *op, time_t lock_time);
+
+void controld_delete_action_history(const lrmd_event_data_t *op);
+
+void controld_cib_delete_last_failure(const char *rsc_id, const char *node,
+ const char *action, guint interval_ms);
+
+void controld_delete_action_history_by_key(const char *rsc_id, const char *node,
+ const char *key, int call_id);
+
+void controld_disconnect_cib_manager(void);
+
+int crmd_cib_smart_opt(void);
+
+/*!
+ * \internal
+ * \brief Check whether an action type should be recorded in the CIB
+ *
+ * \param[in] action Action type
+ *
+ * \return true if action should be recorded, false otherwise
+ */
+static inline bool
+controld_action_is_recordable(const char *action)
+{
+ return !pcmk__str_any_of(action, CRMD_ACTION_CANCEL, CRMD_ACTION_DELETE,
+ CRMD_ACTION_NOTIFY, CRMD_ACTION_METADATA, NULL);
+}
+
+#endif // PCMK__CONTROLD_CIB__H
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
new file mode 100644
index 0000000..ffc62a0
--- /dev/null
+++ b/daemons/controld/controld_control.c
@@ -0,0 +1,857 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/pengine/rules.h>
+#include <crm/cluster/internal.h>
+#include <crm/cluster/election_internal.h>
+#include <crm/common/ipc_internal.h>
+
+#include <pacemaker-controld.h>
+
+static qb_ipcs_service_t *ipcs = NULL;
+
+static crm_trigger_t *config_read_trigger = NULL;
+
+#if SUPPORT_COROSYNC
+extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
+#endif
+
+void crm_shutdown(int nsig);
+static gboolean crm_read_options(gpointer user_data);
+
+/* A_HA_CONNECT */
+void
+do_ha_control(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ gboolean registered = FALSE;
+ static crm_cluster_t *cluster = NULL;
+
+ if (cluster == NULL) {
+ cluster = pcmk_cluster_new();
+ }
+
+ if (action & A_HA_DISCONNECT) {
+ crm_cluster_disconnect(cluster);
+ crm_info("Disconnected from the cluster");
+
+ controld_set_fsa_input_flags(R_HA_DISCONNECTED);
+ }
+
+ if (action & A_HA_CONNECT) {
+ crm_set_status_callback(&peer_update_callback);
+ crm_set_autoreap(FALSE);
+
+#if SUPPORT_COROSYNC
+ if (is_corosync_cluster()) {
+ registered = crm_connect_corosync(cluster);
+ }
+#endif // SUPPORT_COROSYNC
+
+ if (registered) {
+ controld_election_init(cluster->uname);
+ controld_globals.our_nodename = cluster->uname;
+ controld_globals.our_uuid = cluster->uuid;
+ if(cluster->uuid == NULL) {
+ crm_err("Could not obtain local uuid");
+ registered = FALSE;
+ }
+ }
+
+ if (!registered) {
+ controld_set_fsa_input_flags(R_HA_DISCONNECTED);
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ return;
+ }
+
+ populate_cib_nodes(node_update_none, __func__);
+ controld_clear_fsa_input_flags(R_HA_DISCONNECTED);
+ crm_info("Connected to the cluster");
+ }
+
+ if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
+ crm_err("Unexpected action %s in %s", fsa_action2string(action),
+ __func__);
+ }
+}
+
+/* A_SHUTDOWN */
+void
+do_shutdown(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ /* just in case */
+ controld_set_fsa_input_flags(R_SHUTDOWN);
+ controld_disconnect_fencer(FALSE);
+}
+
+/* A_SHUTDOWN_REQ */
+void
+do_shutdown_req(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ xmlNode *msg = NULL;
+
+ controld_set_fsa_input_flags(R_SHUTDOWN);
+ //controld_set_fsa_input_flags(R_STAYDOWN);
+ crm_info("Sending shutdown request to all peers (DC is %s)",
+ pcmk__s(controld_globals.dc_name, "not set"));
+ msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
+
+ if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ }
+ free_xml(msg);
+}
+
+void
+crmd_fast_exit(crm_exit_t exit_code)
+{
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) {
+ crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
+ exit_code, CRM_EX_FATAL);
+ exit_code = CRM_EX_FATAL;
+
+ } else if ((exit_code == CRM_EX_OK)
+ && pcmk_is_set(controld_globals.fsa_input_register,
+ R_IN_RECOVERY)) {
+ crm_err("Could not recover from internal error");
+ exit_code = CRM_EX_ERROR;
+ }
+
+ if (controld_globals.logger_out != NULL) {
+ controld_globals.logger_out->finish(controld_globals.logger_out,
+ exit_code, true, NULL);
+ pcmk__output_free(controld_globals.logger_out);
+ controld_globals.logger_out = NULL;
+ }
+
+ crm_exit(exit_code);
+}
+
+crm_exit_t
+crmd_exit(crm_exit_t exit_code)
+{
+ GMainLoop *mloop = controld_globals.mainloop;
+
+ static bool in_progress = FALSE;
+
+ if (in_progress && (exit_code == CRM_EX_OK)) {
+ crm_debug("Exit is already in progress");
+ return exit_code;
+
+ } else if(in_progress) {
+ crm_notice("Error during shutdown process, exiting now with status %d (%s)",
+ exit_code, crm_exit_str(exit_code));
+ crm_write_blackbox(SIGTRAP, NULL);
+ crmd_fast_exit(exit_code);
+ }
+
+ in_progress = TRUE;
+ crm_trace("Preparing to exit with status %d (%s)",
+ exit_code, crm_exit_str(exit_code));
+
+ /* Suppress secondary errors resulting from us disconnecting everything */
+ controld_set_fsa_input_flags(R_HA_DISCONNECTED);
+
+/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */
+
+ if(ipcs) {
+ crm_trace("Closing IPC server");
+ mainloop_del_ipc_server(ipcs);
+ ipcs = NULL;
+ }
+
+ controld_close_attrd_ipc();
+ controld_shutdown_schedulerd_ipc();
+ controld_disconnect_fencer(TRUE);
+
+ if ((exit_code == CRM_EX_OK) && (controld_globals.mainloop == NULL)) {
+ crm_debug("No mainloop detected");
+ exit_code = CRM_EX_ERROR;
+ }
+
+ /* On an error, just get out.
+ *
+ * Otherwise, make the effort to have mainloop exit gracefully so
+ * that it (mostly) cleans up after itself and valgrind has less
+ * to report on - allowing real errors stand out
+ */
+ if (exit_code != CRM_EX_OK) {
+ crm_notice("Forcing immediate exit with status %d (%s)",
+ exit_code, crm_exit_str(exit_code));
+ crm_write_blackbox(SIGTRAP, NULL);
+ crmd_fast_exit(exit_code);
+ }
+
+/* Clean up as much memory as possible for valgrind */
+
+ for (GList *iter = controld_globals.fsa_message_queue; iter != NULL;
+ iter = iter->next) {
+ fsa_data_t *fsa_data = (fsa_data_t *) iter->data;
+
+ crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
+ fsa_input2string(fsa_data->fsa_input),
+ fsa_state2string(controld_globals.fsa_state),
+ fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
+ delete_fsa_input(fsa_data);
+ }
+
+ controld_clear_fsa_input_flags(R_MEMBERSHIP);
+
+ g_list_free(controld_globals.fsa_message_queue);
+ controld_globals.fsa_message_queue = NULL;
+
+ controld_election_fini();
+
+ /* Tear down the CIB manager connection, but don't free it yet -- it could
+ * be used when we drain the mainloop later.
+ */
+
+ controld_disconnect_cib_manager();
+
+ verify_stopped(controld_globals.fsa_state, LOG_WARNING);
+ controld_clear_fsa_input_flags(R_LRM_CONNECTED);
+ lrm_state_destroy_all();
+
+ mainloop_destroy_trigger(config_read_trigger);
+ config_read_trigger = NULL;
+
+ controld_destroy_fsa_trigger();
+ controld_destroy_transition_trigger();
+
+ pcmk__client_cleanup();
+ crm_peer_destroy();
+
+ controld_free_fsa_timers();
+ te_cleanup_stonith_history_sync(NULL, TRUE);
+ controld_free_sched_timer();
+
+ free(controld_globals.our_nodename);
+ controld_globals.our_nodename = NULL;
+
+ free(controld_globals.our_uuid);
+ controld_globals.our_uuid = NULL;
+
+ free(controld_globals.dc_name);
+ controld_globals.dc_name = NULL;
+
+ free(controld_globals.dc_version);
+ controld_globals.dc_version = NULL;
+
+ free(controld_globals.cluster_name);
+ controld_globals.cluster_name = NULL;
+
+ free(controld_globals.te_uuid);
+ controld_globals.te_uuid = NULL;
+
+ free_max_generation();
+ controld_destroy_cib_replacements_table();
+ controld_destroy_failed_sync_table();
+ controld_destroy_outside_events_table();
+
+ mainloop_destroy_signal(SIGPIPE);
+ mainloop_destroy_signal(SIGUSR1);
+ mainloop_destroy_signal(SIGTERM);
+ mainloop_destroy_signal(SIGTRAP);
+ /* leave SIGCHLD engaged as we might still want to drain some service-actions */
+
+ if (mloop) {
+ GMainContext *ctx = g_main_loop_get_context(controld_globals.mainloop);
+
+ /* Don't re-enter this block */
+ controld_globals.mainloop = NULL;
+
+ /* no signals on final draining anymore */
+ mainloop_destroy_signal(SIGCHLD);
+
+ crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
+
+ {
+ int lpc = 0;
+
+ while((g_main_context_pending(ctx) && lpc < 10)) {
+ lpc++;
+ crm_trace("Iteration %d", lpc);
+ g_main_context_dispatch(ctx);
+ }
+ }
+
+ crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
+ g_main_loop_quit(mloop);
+
+ /* Won't do anything yet, since we're inside it now */
+ g_main_loop_unref(mloop);
+ } else {
+ mainloop_destroy_signal(SIGCHLD);
+ }
+
+ cib_delete(controld_globals.cib_conn);
+ controld_globals.cib_conn = NULL;
+
+ throttle_fini();
+
+ /* Graceful */
+ crm_trace("Done preparing for exit with status %d (%s)",
+ exit_code, crm_exit_str(exit_code));
+ return exit_code;
+}
+
+/* A_EXIT_0, A_EXIT_1 */
+void
+do_exit(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ crm_exit_t exit_code = CRM_EX_OK;
+ int log_level = LOG_INFO;
+ const char *exit_type = "gracefully";
+
+ if (action & A_EXIT_1) {
+ log_level = LOG_ERR;
+ exit_type = "forcefully";
+ exit_code = CRM_EX_ERROR;
+ }
+
+ verify_stopped(cur_state, LOG_ERR);
+ do_crm_log(log_level, "Performing %s - %s exiting the controller",
+ fsa_action2string(action), exit_type);
+
+ crm_info("[%s] stopped (%d)", crm_system_name, exit_code);
+ crmd_exit(exit_code);
+}
+
+static void sigpipe_ignore(int nsig) { return; }
+
+/* A_STARTUP */
+void
+do_startup(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ crm_debug("Registering Signal Handlers");
+ mainloop_add_signal(SIGTERM, crm_shutdown);
+ mainloop_add_signal(SIGPIPE, sigpipe_ignore);
+
+ config_read_trigger = mainloop_add_trigger(G_PRIORITY_HIGH,
+ crm_read_options, NULL);
+
+ controld_init_fsa_trigger();
+ controld_init_transition_trigger();
+
+ crm_debug("Creating CIB manager and executor objects");
+ controld_globals.cib_conn = cib_new();
+
+ lrm_state_init_local();
+ if (controld_init_fsa_timers() == FALSE) {
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ }
+}
+
+// \return libqb error code (0 on success, -errno on error)
+static int32_t
+accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
+{
+ crm_trace("Accepting new IPC client connection");
+ if (pcmk__new_client(c, uid, gid) == NULL) {
+ return -EIO;
+ }
+ return 0;
+}
+
+// \return libqb error code (0 on success, -errno on error)
+static int32_t
+dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size)
+{
+ uint32_t id = 0;
+ uint32_t flags = 0;
+ pcmk__client_t *client = pcmk__find_client(c);
+
+ xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags);
+
+ if (msg == NULL) {
+ pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL);
+ return 0;
+ }
+ pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_INDETERMINATE);
+
+ CRM_ASSERT(client->user != NULL);
+ pcmk__update_acl_user(msg, F_CRM_USER, client->user);
+
+ crm_xml_add(msg, F_CRM_SYS_FROM, client->id);
+ if (controld_authorize_ipc_message(msg, client, NULL)) {
+ crm_trace("Processing IPC message from client %s",
+ pcmk__client_name(client));
+ route_message(C_IPC_MESSAGE, msg);
+ }
+
+ controld_trigger_fsa();
+ free_xml(msg);
+ return 0;
+}
+
+static int32_t
+ipc_client_disconnected(qb_ipcs_connection_t *c)
+{
+ pcmk__client_t *client = pcmk__find_client(c);
+
+ if (client) {
+ crm_trace("Disconnecting %sregistered client %s (%p/%p)",
+ (client->userdata? "" : "un"), pcmk__client_name(client),
+ c, client);
+ free(client->userdata);
+ pcmk__free_client(client);
+ controld_trigger_fsa();
+ }
+ return 0;
+}
+
+static void
+ipc_connection_destroyed(qb_ipcs_connection_t *c)
+{
+ crm_trace("Connection %p", c);
+ ipc_client_disconnected(c);
+}
+
+/* A_STOP */
+void
+do_stop(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ crm_trace("Closing IPC server");
+ mainloop_del_ipc_server(ipcs); ipcs = NULL;
+ register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
+}
+
+/* A_STARTED */
+void
+do_started(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ static struct qb_ipcs_service_handlers crmd_callbacks = {
+ .connection_accept = accept_controller_client,
+ .connection_created = NULL,
+ .msg_process = dispatch_controller_ipc,
+ .connection_closed = ipc_client_disconnected,
+ .connection_destroyed = ipc_connection_destroyed
+ };
+
+ if (cur_state != S_STARTING) {
+ crm_err("Start cancelled... %s", fsa_state2string(cur_state));
+ return;
+
+ } else if (!pcmk_is_set(controld_globals.fsa_input_register,
+ R_MEMBERSHIP)) {
+ crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
+
+ crmd_fsa_stall(TRUE);
+ return;
+
+ } else if (!pcmk_is_set(controld_globals.fsa_input_register,
+ R_LRM_CONNECTED)) {
+ crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);
+
+ crmd_fsa_stall(TRUE);
+ return;
+
+ } else if (!pcmk_is_set(controld_globals.fsa_input_register,
+ R_CIB_CONNECTED)) {
+ crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
+
+ crmd_fsa_stall(TRUE);
+ return;
+
+ } else if (!pcmk_is_set(controld_globals.fsa_input_register,
+ R_READ_CONFIG)) {
+ crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
+
+ crmd_fsa_stall(TRUE);
+ return;
+
+ } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_PEER_DATA)) {
+
+ crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
+ crmd_fsa_stall(TRUE);
+ return;
+ }
+
+ crm_debug("Init server comms");
+ ipcs = pcmk__serve_controld_ipc(&crmd_callbacks);
+ if (ipcs == NULL) {
+ crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ } else {
+ crm_notice("Pacemaker controller successfully started and accepting connections");
+ }
+ controld_trigger_fencer_connect();
+
+ controld_clear_fsa_input_flags(R_STARTING);
+ register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
+}
+
+/* A_RECOVER */
+void
+do_recover(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ controld_set_fsa_input_flags(R_IN_RECOVERY);
+ crm_warn("Fast-tracking shutdown in response to errors");
+
+ register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
+}
+
+static pcmk__cluster_option_t controller_options[] = {
+ /* name, old name, type, allowed values,
+ * default value, validator,
+ * short description,
+ * long description
+ */
+ {
+ "dc-version", NULL, "string", NULL, PCMK__VALUE_NONE, NULL,
+ N_("Pacemaker version on cluster node elected Designated Controller (DC)"),
+ N_("Includes a hash which identifies the exact changeset the code was "
+ "built from. Used for diagnostic purposes.")
+ },
+ {
+ "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL,
+ N_("The messaging stack on which Pacemaker is currently running"),
+ N_("Used for informational and diagnostic purposes.")
+ },
+ {
+ "cluster-name", NULL, "string", NULL, NULL, NULL,
+ N_("An arbitrary name for the cluster"),
+ N_("This optional value is mostly for users' convenience as desired "
+ "in administration, but may also be used in Pacemaker "
+ "configuration rules via the #cluster-name node attribute, and "
+ "by higher-level tools and resource agents.")
+ },
+ {
+ XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time",
+ NULL, "20s", pcmk__valid_interval_spec,
+ N_("How long to wait for a response from other nodes during start-up"),
+ N_("The optimal value will depend on the speed and load of your network "
+ "and the type of switches used.")
+ },
+ {
+ XML_CONFIG_ATTR_RECHECK, NULL, "time",
+ N_("Zero disables polling, while positive values are an interval in seconds"
+ "(unless other units are specified, for example \"5min\")"),
+ "15min", pcmk__valid_interval_spec,
+ N_("Polling interval to recheck cluster state and evaluate rules "
+ "with date specifications"),
+ N_("Pacemaker is primarily event-driven, and looks ahead to know when to "
+ "recheck cluster state for failure timeouts and most time-based "
+ "rules. However, it will also recheck the cluster after this "
+ "amount of inactivity, to evaluate rules with date specifications "
+ "and serve as a fail-safe for certain types of scheduler bugs.")
+ },
+ {
+ "load-threshold", NULL, "percentage", NULL,
+ "80%", pcmk__valid_percentage,
+ N_("Maximum amount of system load that should be used by cluster nodes"),
+ N_("The cluster will slow down its recovery process when the amount of "
+ "system resources used (currently CPU) approaches this limit"),
+ },
+ {
+ "node-action-limit", NULL, "integer", NULL,
+ "0", pcmk__valid_number,
+ N_("Maximum number of jobs that can be scheduled per node "
+ "(defaults to 2x cores)")
+ },
+ { XML_CONFIG_ATTR_FENCE_REACTION, NULL, "string", NULL, "stop", NULL,
+ N_("How a cluster node should react if notified of its own fencing"),
+ N_("A cluster node may receive notification of its own fencing if fencing "
+ "is misconfigured, or if fabric fencing is in use that doesn't cut "
+ "cluster communication. Allowed values are \"stop\" to attempt to "
+ "immediately stop Pacemaker and stay stopped, or \"panic\" to attempt "
+ "to immediately reboot the local node, falling back to stop on failure.")
+ },
+ {
+ XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL,
+ "2min", pcmk__valid_interval_spec,
+ "*** Advanced Use Only ***",
+ N_("Declare an election failed if it is not decided within this much "
+ "time. If you need to adjust this value, it probably indicates "
+ "the presence of a bug.")
+ },
+ {
+ XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL,
+ "20min", pcmk__valid_interval_spec,
+ "*** Advanced Use Only ***",
+ N_("Exit immediately if shutdown does not complete within this much "
+ "time. If you need to adjust this value, it probably indicates "
+ "the presence of a bug.")
+ },
+ {
+ "join-integration-timeout", "crmd-integration-timeout", "time", NULL,
+ "3min", pcmk__valid_interval_spec,
+ "*** Advanced Use Only ***",
+ N_("If you need to adjust this value, it probably indicates "
+ "the presence of a bug.")
+ },
+ {
+ "join-finalization-timeout", "crmd-finalization-timeout", "time", NULL,
+ "30min", pcmk__valid_interval_spec,
+ "*** Advanced Use Only ***",
+ N_("If you need to adjust this value, it probably indicates "
+ "the presence of a bug.")
+ },
+ {
+ "transition-delay", "crmd-transition-delay", "time", NULL,
+ "0s", pcmk__valid_interval_spec,
+ N_("*** Advanced Use Only *** Enabling this option will slow down "
+ "cluster recovery under all conditions"),
+ N_("Delay cluster recovery for this much time to allow for additional "
+ "events to occur. Useful if your configuration is sensitive to "
+ "the order in which ping updates arrive.")
+ },
+ {
+ "stonith-watchdog-timeout", NULL, "time", NULL,
+ "0", controld_verify_stonith_watchdog_timeout,
+ N_("How long before nodes can be assumed to be safely down when "
+ "watchdog-based self-fencing via SBD is in use"),
+ N_("If this is set to a positive value, lost nodes are assumed to "
+ "self-fence using watchdog-based SBD within this much time. This "
+ "does not require a fencing resource to be explicitly configured, "
+ "though a fence_watchdog resource can be configured, to limit use "
+ "to specific nodes. If this is set to 0 (the default), the cluster "
+ "will never assume watchdog-based self-fencing. If this is set to a "
+ "negative value, the cluster will use twice the local value of the "
+ "`SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, "
+ "or otherwise treat this as 0. WARNING: When used, this timeout "
+ "must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use "
+ "watchdog-based SBD, and Pacemaker will refuse to start on any of "
+ "those nodes where this is not true for the local value or SBD is "
+ "not active. When this is set to a negative value, "
+ "`SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes "
+ "that use SBD, otherwise data corruption or loss could occur.")
+ },
+ {
+ "stonith-max-attempts", NULL, "integer", NULL,
+ "10", pcmk__valid_positive_number,
+ N_("How many times fencing can fail before it will no longer be "
+ "immediately re-attempted on a target")
+ },
+
+ // Already documented in libpe_status (other values must be kept identical)
+ {
+ "no-quorum-policy", NULL, "select",
+ "stop, freeze, ignore, demote, suicide", "stop", pcmk__valid_quorum,
+ N_("What to do when the cluster does not have quorum"), NULL
+ },
+ {
+ XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL,
+ "false", pcmk__valid_boolean,
+ N_("Whether to lock resources to a cleanly shut down node"),
+ N_("When true, resources active on a node when it is cleanly shut down "
+ "are kept \"locked\" to that node (not allowed to run elsewhere) "
+ "until they start again on that node after it rejoins (or for at "
+ "most shutdown-lock-limit, if set). Stonith resources and "
+ "Pacemaker Remote connections are never locked. Clone and bundle "
+ "instances and the promoted role of promotable clones are "
+ "currently never locked, though support could be added in a future "
+ "release.")
+ },
+ {
+ XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL,
+ "0", pcmk__valid_interval_spec,
+ N_("Do not lock resources to a cleanly shut down node longer than "
+ "this"),
+ N_("If shutdown-lock is true and this is set to a nonzero time "
+ "duration, shutdown locks will expire after this much time has "
+ "passed since the shutdown was initiated, even if the node has not "
+ "rejoined.")
+ },
+};
+
+void
+crmd_metadata(void)
+{
+ const char *desc_short = "Pacemaker controller options";
+ const char *desc_long = "Cluster options used by Pacemaker's controller";
+
+ gchar *s = pcmk__format_option_metadata("pacemaker-controld", desc_short,
+ desc_long, controller_options,
+ PCMK__NELEM(controller_options));
+ printf("%s", s);
+ g_free(s);
+}
+
+static void
+config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ const char *value = NULL;
+ GHashTable *config_hash = NULL;
+ crm_time_t *now = crm_time_new(NULL);
+ xmlNode *crmconfig = NULL;
+ xmlNode *alerts = NULL;
+
+ if (rc != pcmk_ok) {
+ fsa_data_t *msg_data = NULL;
+
+ crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+
+ if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
+ crm_err("The cluster is mis-configured - shutting down and staying down");
+ controld_set_fsa_input_flags(R_STAYDOWN);
+ }
+ goto bail;
+ }
+
+ crmconfig = output;
+ if ((crmconfig) &&
+ (crm_element_name(crmconfig)) &&
+ (strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) {
+ crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG);
+ }
+ if (!crmconfig) {
+ fsa_data_t *msg_data = NULL;
+
+ crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed");
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ goto bail;
+ }
+
+ crm_debug("Call %d : Parsing CIB options", call_id);
+ config_hash = pcmk__strkey_table(free, free);
+ pe_unpack_nvpairs(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL,
+ config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL);
+
+ // Validate all options, and use defaults if not already present in hash
+ pcmk__validate_cluster_options(config_hash, controller_options,
+ PCMK__NELEM(controller_options));
+
+ value = g_hash_table_lookup(config_hash, "no-quorum-policy");
+ if (pcmk__str_eq(value, "suicide", pcmk__str_casei) && pcmk__locate_sbd()) {
+ controld_set_global_flags(controld_no_quorum_suicide);
+ }
+
+ value = g_hash_table_lookup(config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK);
+ if (crm_is_true(value)) {
+ controld_set_global_flags(controld_shutdown_lock_enabled);
+ } else {
+ controld_clear_global_flags(controld_shutdown_lock_enabled);
+ }
+
+ value = g_hash_table_lookup(config_hash,
+ XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT);
+ controld_globals.shutdown_lock_limit = crm_parse_interval_spec(value)
+ / 1000;
+
+ value = g_hash_table_lookup(config_hash, "cluster-name");
+ pcmk__str_update(&(controld_globals.cluster_name), value);
+
+ // Let subcomponents initialize their own static variables
+ controld_configure_election(config_hash);
+ controld_configure_fencing(config_hash);
+ controld_configure_fsa_timers(config_hash);
+ controld_configure_throttle(config_hash);
+
+ alerts = first_named_child(output, XML_CIB_TAG_ALERTS);
+ crmd_unpack_alerts(alerts);
+
+ controld_set_fsa_input_flags(R_READ_CONFIG);
+ controld_trigger_fsa();
+
+ g_hash_table_destroy(config_hash);
+ bail:
+ crm_time_free(now);
+}
+
+/*!
+ * \internal
+ * \brief Trigger read and processing of the configuration
+ *
+ * \param[in] fn Calling function name
+ * \param[in] line Line number where call occurred
+ */
+void
+controld_trigger_config_as(const char *fn, int line)
+{
+ if (config_read_trigger != NULL) {
+ crm_trace("%s:%d - Triggered config processing", fn, line);
+ mainloop_set_trigger(config_read_trigger);
+ }
+}
+
+gboolean
+crm_read_options(gpointer user_data)
+{
+ cib_t *cib_conn = controld_globals.cib_conn;
+ int call_id = cib_conn->cmds->query(cib_conn,
+ "//" XML_CIB_TAG_CRMCONFIG
+ " | //" XML_CIB_TAG_ALERTS,
+ NULL, cib_xpath|cib_scope_local);
+
+ fsa_register_cib_callback(call_id, NULL, config_query_callback);
+ crm_trace("Querying the CIB... call %d", call_id);
+ return TRUE;
+}
+
+/* A_READCONFIG */
+void
+do_read_config(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ throttle_init();
+ controld_trigger_config();
+}
+
+void
+crm_shutdown(int nsig)
+{
+ const char *value = NULL;
+ guint default_period_ms = 0;
+
+ if ((controld_globals.mainloop == NULL)
+ || !g_main_loop_is_running(controld_globals.mainloop)) {
+ crmd_exit(CRM_EX_OK);
+ return;
+ }
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ crm_err("Escalating shutdown");
+ register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
+ return;
+ }
+
+ controld_set_fsa_input_flags(R_SHUTDOWN);
+ register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
+
+ /* If shutdown timer doesn't have a period set, use the default
+ *
+ * @TODO: Evaluate whether this is still necessary. As long as
+ * config_query_callback() has been run at least once, it doesn't look like
+ * anything could have changed the timer period since then.
+ */
+ value = pcmk__cluster_option(NULL, controller_options,
+ PCMK__NELEM(controller_options),
+ XML_CONFIG_ATTR_FORCE_QUIT);
+ default_period_ms = crm_parse_interval_spec(value);
+ controld_shutdown_start_countdown(default_period_ms);
+}
diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c
new file mode 100644
index 0000000..4378b30
--- /dev/null
+++ b/daemons/controld/controld_corosync.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <crm/crm.h>
+#include <crm/cluster/internal.h>
+#include <crm/common/xml.h>
+
+#include <pacemaker-controld.h>
+
+#if SUPPORT_COROSYNC
+
+extern void post_cache_update(int seq);
+
+/* A_HA_CONNECT */
+
+static void
+crmd_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName,
+ uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
+{
+ uint32_t kind = 0;
+ const char *from = NULL;
+ char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
+
+ if(data == NULL) {
+ return;
+ }
+ if (kind == crm_class_cluster) {
+ crm_node_t *peer = NULL;
+ xmlNode *xml = string2xml(data);
+
+ if (xml == NULL) {
+ crm_err("Could not parse message content (%d): %.100s", kind, data);
+ free(data);
+ return;
+ }
+
+ crm_xml_add(xml, F_ORIG, from);
+ /* crm_xml_add_int(xml, F_SEQ, wrapper->id); Fake? */
+
+ peer = crm_get_peer(0, from);
+ if (!pcmk_is_set(peer->processes, crm_proc_cpg)) {
+ /* If we can still talk to our peer process on that node,
+ * then it must be part of the corosync membership
+ */
+ crm_warn("Receiving messages from a node we think is dead: %s[%d]",
+ peer->uname, peer->id);
+ crm_update_peer_proc(__func__, peer, crm_proc_cpg,
+ ONLINESTATUS);
+ }
+ crmd_ha_msg_filter(xml);
+ free_xml(xml);
+ } else {
+ crm_err("Invalid message class (%d): %.100s", kind, data);
+ }
+ free(data);
+}
+
+static gboolean
+crmd_quorum_callback(unsigned long long seq, gboolean quorate)
+{
+ crm_update_quorum(quorate, FALSE);
+ post_cache_update(seq);
+ return TRUE;
+}
+
+static void
+crmd_cs_destroy(gpointer user_data)
+{
+ if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) {
+ crm_crit("Lost connection to cluster layer, shutting down");
+ crmd_exit(CRM_EX_DISCONNECT);
+
+ } else {
+ crm_info("Corosync connection closed");
+ }
+}
+
+/*!
+ * \brief Handle a Corosync notification of a CPG configuration change
+ *
+ * \param[in] handle CPG connection
+ * \param[in] cpg_name CPG group name
+ * \param[in] member_list List of current CPG members
+ * \param[in] member_list_entries Number of entries in \p member_list
+ * \param[in] left_list List of CPG members that left
+ * \param[in] left_list_entries Number of entries in \p left_list
+ * \param[in] joined_list List of CPG members that joined
+ * \param[in] joined_list_entries Number of entries in \p joined_list
+ */
+static void
+cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name,
+ const struct cpg_address *member_list,
+ size_t member_list_entries,
+ const struct cpg_address *left_list,
+ size_t left_list_entries,
+ const struct cpg_address *joined_list,
+ size_t joined_list_entries)
+{
+ /* When nodes leave CPG, the DC clears their transient node attributes.
+ *
+ * However if there is no DC, or the DC is among the nodes that left, each
+ * remaining node needs to do the clearing, to ensure it gets done.
+ * Otherwise, the attributes would persist when the nodes rejoin, which
+ * could have serious consequences for unfencing, agents that use attributes
+ * for internal logic, etc.
+ *
+ * Here, we set a global boolean if the DC is among the nodes that left, for
+ * use by the peer callback.
+ */
+ if (controld_globals.dc_name != NULL) {
+ crm_node_t *peer = NULL;
+
+ peer = pcmk__search_cluster_node_cache(0, controld_globals.dc_name);
+ if (peer != NULL) {
+ for (int i = 0; i < left_list_entries; ++i) {
+ if (left_list[i].nodeid == peer->id) {
+ controld_set_global_flags(controld_dc_left);
+ break;
+ }
+ }
+ }
+ }
+
+ // Process the change normally, which will call the peer callback as needed
+ pcmk_cpg_membership(handle, cpg_name, member_list, member_list_entries,
+ left_list, left_list_entries,
+ joined_list, joined_list_entries);
+
+ controld_clear_global_flags(controld_dc_left);
+}
+
+extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
+
+gboolean
+crm_connect_corosync(crm_cluster_t * cluster)
+{
+ if (is_corosync_cluster()) {
+ crm_set_status_callback(&peer_update_callback);
+ cluster->cpg.cpg_deliver_fn = crmd_cs_dispatch;
+ cluster->cpg.cpg_confchg_fn = cpg_membership_callback;
+ cluster->destroy = crmd_cs_destroy;
+
+ if (crm_cluster_connect(cluster)) {
+ pcmk__corosync_quorum_connect(crmd_quorum_callback,
+ crmd_cs_destroy);
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+#endif
diff --git a/daemons/controld/controld_election.c b/daemons/controld/controld_election.c
new file mode 100644
index 0000000..5f33d5b
--- /dev/null
+++ b/daemons/controld/controld_election.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/cluster/internal.h>
+#include <crm/cluster/election_internal.h>
+#include <crm/crm.h>
+
+#include <pacemaker-controld.h>
+
+static election_t *fsa_election = NULL;
+
+static gboolean
+election_win_cb(gpointer data)
+{
+ register_fsa_input(C_FSA_INTERNAL, I_ELECTION_DC, NULL);
+ return FALSE;
+}
+
+void
+controld_election_init(const char *uname)
+{
+ fsa_election = election_init("DC", uname, 60000 /*60s*/, election_win_cb);
+}
+
+/*!
+ * \internal
+ * \brief Configure election options based on the CIB
+ *
+ * \param[in,out] options Name/value pairs for configured options
+ */
+void
+controld_configure_election(GHashTable *options)
+{
+ const char *value = NULL;
+
+ value = g_hash_table_lookup(options, XML_CONFIG_ATTR_ELECTION_FAIL);
+ election_timeout_set_period(fsa_election, crm_parse_interval_spec(value));
+}
+
+void
+controld_remove_voter(const char *uname)
+{
+ election_remove(fsa_election, uname);
+
+ if (pcmk__str_eq(uname, controld_globals.dc_name, pcmk__str_casei)) {
+ /* Clear any election dampening in effect. Otherwise, if the lost DC had
+ * just won, an immediate new election could fizzle out with no new DC.
+ */
+ election_clear_dampening(fsa_election);
+ }
+}
+
+void
+controld_election_fini(void)
+{
+ election_fini(fsa_election);
+ fsa_election = NULL;
+}
+
+void
+controld_stop_current_election_timeout(void)
+{
+ election_timeout_stop(fsa_election);
+}
+
+/* A_ELECTION_VOTE */
+void
+do_election_vote(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ gboolean not_voting = FALSE;
+
+ /* don't vote if we're in one of these states or wanting to shut down */
+ switch (cur_state) {
+ case S_STARTING:
+ case S_RECOVERY:
+ case S_STOPPING:
+ case S_TERMINATE:
+ crm_warn("Not voting in election, we're in state %s", fsa_state2string(cur_state));
+ not_voting = TRUE;
+ break;
+ case S_ELECTION:
+ case S_INTEGRATION:
+ case S_RELEASE_DC:
+ break;
+ default:
+ crm_err("Broken? Voting in state %s", fsa_state2string(cur_state));
+ break;
+ }
+
+ if (not_voting == FALSE) {
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_STARTING)) {
+ not_voting = TRUE;
+ }
+ }
+
+ if (not_voting) {
+ if (AM_I_DC) {
+ register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL);
+
+ } else {
+ register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL);
+ }
+ return;
+ }
+
+ election_vote(fsa_election);
+ return;
+}
+
+void
+do_election_check(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ if (controld_globals.fsa_state == S_ELECTION) {
+ election_check(fsa_election);
+ } else {
+ crm_debug("Ignoring election check because we are not in an election");
+ }
+}
+
+/* A_ELECTION_COUNT */
+void
+do_election_count_vote(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ enum election_result rc = 0;
+ ha_msg_input_t *vote = fsa_typed_data(fsa_dt_ha_msg);
+
+ if(crm_peer_cache == NULL) {
+ if (!pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ crm_err("Internal error, no peer cache");
+ }
+ return;
+ }
+
+ rc = election_count_vote(fsa_election, vote->msg, cur_state != S_STARTING);
+ switch(rc) {
+ case election_start:
+ election_reset(fsa_election);
+ register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
+ break;
+
+ case election_lost:
+ update_dc(NULL);
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_THE_DC)) {
+ cib_t *cib_conn = controld_globals.cib_conn;
+
+ register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL);
+ cib_conn->cmds->set_secondary(cib_conn, cib_scope_local);
+
+ } else if (cur_state != S_STARTING) {
+ register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL);
+ }
+ break;
+
+ default:
+ crm_trace("Election message resulted in state %d", rc);
+ }
+}
+
+static void
+feature_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ if (rc != pcmk_ok) {
+ fsa_data_t *msg_data = NULL;
+
+ crm_notice("Feature update failed: %s "CRM_XS" rc=%d",
+ pcmk_strerror(rc), rc);
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Update a node attribute in the CIB during a DC takeover
+ *
+ * \param[in] name Name of attribute to update
+ * \param[in] value New attribute value
+ */
+#define dc_takeover_update_attr(name, value) do { \
+ cib__update_node_attr(controld_globals.logger_out, \
+ controld_globals.cib_conn, cib_none, \
+ XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, \
+ name, value, NULL, NULL); \
+ } while (0)
+
+/* A_DC_TAKEOVER */
+void
+do_dc_takeover(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ xmlNode *cib = NULL;
+ const char *cluster_type = name_for_cluster_type(get_cluster_type());
+ pid_t watchdog = pcmk__locate_sbd();
+
+ crm_info("Taking over DC status for this partition");
+ controld_set_fsa_input_flags(R_THE_DC);
+ execute_stonith_cleanup();
+
+ election_reset(fsa_election);
+ controld_set_fsa_input_flags(R_JOIN_OK|R_INVOKE_PE);
+
+ controld_globals.cib_conn->cmds->set_primary(controld_globals.cib_conn,
+ cib_scope_local);
+
+ cib = create_xml_node(NULL, XML_TAG_CIB);
+ crm_xml_add(cib, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
+ controld_update_cib(XML_TAG_CIB, cib, cib_none, feature_update_callback);
+
+ dc_takeover_update_attr(XML_ATTR_HAVE_WATCHDOG, pcmk__btoa(watchdog));
+ dc_takeover_update_attr("dc-version", PACEMAKER_VERSION "-" BUILD_VERSION);
+ dc_takeover_update_attr("cluster-infrastructure", cluster_type);
+
+#if SUPPORT_COROSYNC
+ if ((controld_globals.cluster_name == NULL) && is_corosync_cluster()) {
+ char *cluster_name = pcmk__corosync_cluster_name();
+
+ if (cluster_name != NULL) {
+ dc_takeover_update_attr("cluster-name", cluster_name);
+ }
+ free(cluster_name);
+ }
+#endif
+
+ controld_trigger_config();
+ free_xml(cib);
+}
+
+/* A_DC_RELEASE */
+void
+do_dc_release(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ if (action & A_DC_RELEASE) {
+ crm_debug("Releasing the role of DC");
+ controld_clear_fsa_input_flags(R_THE_DC);
+ controld_expect_sched_reply(NULL);
+
+ } else if (action & A_DC_RELEASED) {
+ crm_info("DC role released");
+#if 0
+ if (are there errors) {
+ /* we can't stay up if not healthy */
+ /* or perhaps I_ERROR and go to S_RECOVER? */
+ result = I_SHUTDOWN;
+ }
+#endif
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ xmlNode *update = NULL;
+ crm_node_t *node = crm_get_peer(0, controld_globals.our_nodename);
+
+ pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN);
+ update = create_node_state_update(node, node_update_expected, NULL,
+ __func__);
+ /* Don't need a based response because controld will stop. */
+ fsa_cib_anon_update_discard_reply(XML_CIB_TAG_STATUS, update);
+ free_xml(update);
+ }
+ register_fsa_input(C_FSA_INTERNAL, I_RELEASE_SUCCESS, NULL);
+
+ } else {
+ crm_err("Unknown DC action %s", fsa_action2string(action));
+ }
+
+ crm_trace("Am I still the DC? %s", AM_I_DC ? XML_BOOLEAN_YES : XML_BOOLEAN_NO);
+
+}
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
new file mode 100644
index 0000000..0de399c
--- /dev/null
+++ b/daemons/controld/controld_execd.c
@@ -0,0 +1,2433 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <regex.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include <crm/crm.h>
+#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_rsc_info_t, etc.
+#include <crm/services.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/pengine/rules.h>
+#include <crm/lrmd_internal.h>
+
+#include <pacemaker-internal.h>
+#include <pacemaker-controld.h>
+
+#define START_DELAY_THRESHOLD 5 * 60 * 1000
+#define MAX_LRM_REG_FAILS 30
+
+struct delete_event_s {
+ int rc;
+ const char *rsc;
+ lrm_state_t *lrm_state;
+};
+
+static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id);
+static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list);
+static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data);
+
+static lrmd_event_data_t *construct_op(const lrm_state_t *lrm_state,
+ const xmlNode *rsc_op,
+ const char *rsc_id,
+ const char *operation);
+static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
+ xmlNode *msg, struct ra_metadata_s *md);
+
+static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
+ int log_level);
+
+static void
+lrm_connection_destroy(void)
+{
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) {
+ crm_crit("Connection to executor failed");
+ register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
+ controld_clear_fsa_input_flags(R_LRM_CONNECTED);
+
+ } else {
+ crm_info("Disconnected from executor");
+ }
+
+}
+
+static char *
+make_stop_id(const char *rsc, int call_id)
+{
+ return crm_strdup_printf("%s:%d", rsc, call_id);
+}
+
+static void
+copy_instance_keys(gpointer key, gpointer value, gpointer user_data)
+{
+ if (strstr(key, CRM_META "_") == NULL) {
+ g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
+ }
+}
+
+static void
+copy_meta_keys(gpointer key, gpointer value, gpointer user_data)
+{
+ if (strstr(key, CRM_META "_") != NULL) {
+ g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
+ }
+}
+
+/*!
+ * \internal
+ * \brief Remove a recurring operation from a resource's history
+ *
+ * \param[in,out] history Resource history to modify
+ * \param[in] op Operation to remove
+ *
+ * \return TRUE if the operation was found and removed, FALSE otherwise
+ */
+static gboolean
+history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op)
+{
+ GList *iter;
+
+ for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
+ lrmd_event_data_t *existing = iter->data;
+
+ if ((op->interval_ms == existing->interval_ms)
+ && pcmk__str_eq(op->rsc_id, existing->rsc_id, pcmk__str_none)
+ && pcmk__str_eq(op->op_type, existing->op_type, pcmk__str_casei)) {
+
+ history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter);
+ lrmd_free_event(existing);
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+/*!
+ * \internal
+ * \brief Free all recurring operations in resource history
+ *
+ * \param[in,out] history Resource history to modify
+ */
+static void
+history_free_recurring_ops(rsc_history_t *history)
+{
+ GList *iter;
+
+ for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
+ lrmd_free_event(iter->data);
+ }
+ g_list_free(history->recurring_op_list);
+ history->recurring_op_list = NULL;
+}
+
+/*!
+ * \internal
+ * \brief Free resource history
+ *
+ * \param[in,out] history Resource history to free
+ */
+void
+history_free(gpointer data)
+{
+ rsc_history_t *history = (rsc_history_t*)data;
+
+ if (history->stop_params) {
+ g_hash_table_destroy(history->stop_params);
+ }
+
+ /* Don't need to free history->rsc.id because it's set to history->id */
+ free(history->rsc.type);
+ free(history->rsc.standard);
+ free(history->rsc.provider);
+
+ lrmd_free_event(history->failed);
+ lrmd_free_event(history->last);
+ free(history->id);
+ history_free_recurring_ops(history);
+ free(history);
+}
+
+static void
+update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
+{
+ int target_rc = 0;
+ rsc_history_t *entry = NULL;
+
+ if (op->rsc_deleted) {
+ crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type);
+ controld_delete_resource_history(op->rsc_id, lrm_state->node_name,
+ NULL, crmd_cib_smart_opt());
+ return;
+ }
+
+ if (pcmk__str_eq(op->op_type, RSC_NOTIFY, pcmk__str_casei)) {
+ return;
+ }
+
+ crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type);
+
+ entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id);
+ if (entry == NULL && rsc) {
+ entry = calloc(1, sizeof(rsc_history_t));
+ entry->id = strdup(op->rsc_id);
+ g_hash_table_insert(lrm_state->resource_history, entry->id, entry);
+
+ entry->rsc.id = entry->id;
+ entry->rsc.type = strdup(rsc->type);
+ entry->rsc.standard = strdup(rsc->standard);
+ pcmk__str_update(&entry->rsc.provider, rsc->provider);
+
+ } else if (entry == NULL) {
+ crm_info("Resource %s no longer exists, not updating cache", op->rsc_id);
+ return;
+ }
+
+ entry->last_callid = op->call_id;
+ target_rc = rsc_op_expected_rc(op);
+ if (op->op_status == PCMK_EXEC_CANCELLED) {
+ if (op->interval_ms > 0) {
+ crm_trace("Removing cancelled recurring op: " PCMK__OP_FMT,
+ op->rsc_id, op->op_type, op->interval_ms);
+ history_remove_recurring_op(entry, op);
+ return;
+ } else {
+ crm_trace("Skipping " PCMK__OP_FMT " rc=%d, status=%d",
+ op->rsc_id, op->op_type, op->interval_ms, op->rc,
+ op->op_status);
+ }
+
+ } else if (did_rsc_op_fail(op, target_rc)) {
+ /* Store failed monitors here, otherwise the block below will cause them
+ * to be forgotten when a stop happens.
+ */
+ if (entry->failed) {
+ lrmd_free_event(entry->failed);
+ }
+ entry->failed = lrmd_copy_event(op);
+
+ } else if (op->interval_ms == 0) {
+ if (entry->last) {
+ lrmd_free_event(entry->last);
+ }
+ entry->last = lrmd_copy_event(op);
+
+ if (op->params && pcmk__strcase_any_of(op->op_type, CRMD_ACTION_START,
+ CRMD_ACTION_RELOAD,
+ CRMD_ACTION_RELOAD_AGENT,
+ CRMD_ACTION_STATUS, NULL)) {
+ if (entry->stop_params) {
+ g_hash_table_destroy(entry->stop_params);
+ }
+ entry->stop_params = pcmk__strkey_table(free, free);
+
+ g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params);
+ }
+ }
+
+ if (op->interval_ms > 0) {
+ /* Ensure there are no duplicates */
+ history_remove_recurring_op(entry, op);
+
+ crm_trace("Adding recurring op: " PCMK__OP_FMT,
+ op->rsc_id, op->op_type, op->interval_ms);
+ entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op));
+
+ } else if (entry->recurring_op_list && !pcmk__str_eq(op->op_type, RSC_STATUS, pcmk__str_casei)) {
+ crm_trace("Dropping %d recurring ops because of: " PCMK__OP_FMT,
+ g_list_length(entry->recurring_op_list), op->rsc_id,
+ op->op_type, op->interval_ms);
+ history_free_recurring_ops(entry);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Send a direct OK ack for a resource task
+ *
+ * \param[in] lrm_state LRM connection
+ * \param[in] input Input message being ack'ed
+ * \param[in] rsc_id ID of affected resource
+ * \param[in] rsc Affected resource (if available)
+ * \param[in] task Operation task being ack'ed
+ * \param[in] ack_host Name of host to send ack to
+ * \param[in] ack_sys IPC system name to ack
+ */
+static void
+send_task_ok_ack(const lrm_state_t *lrm_state, const ha_msg_input_t *input,
+ const char *rsc_id, const lrmd_rsc_info_t *rsc,
+ const char *task, const char *ack_host, const char *ack_sys)
+{
+ lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task);
+
+ lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
+ controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id);
+ lrmd_free_event(op);
+}
+
+static inline const char *
+op_node_name(lrmd_event_data_t *op)
+{
+ return pcmk__s(op->remote_nodename, controld_globals.our_nodename);
+}
+
+void
+lrm_op_callback(lrmd_event_data_t * op)
+{
+ CRM_CHECK(op != NULL, return);
+ switch (op->type) {
+ case lrmd_event_disconnect:
+ if (op->remote_nodename == NULL) {
+ /* If this is the local executor IPC connection, set the right
+ * bits in the controller when the connection goes down.
+ */
+ lrm_connection_destroy();
+ }
+ break;
+
+ case lrmd_event_exec_complete:
+ {
+ lrm_state_t *lrm_state = lrm_state_find(op_node_name(op));
+
+ CRM_ASSERT(lrm_state != NULL);
+ process_lrm_event(lrm_state, op, NULL, NULL);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+try_local_executor_connect(long long action, fsa_data_t *msg_data,
+ lrm_state_t *lrm_state)
+{
+ int rc = pcmk_rc_ok;
+
+ crm_debug("Connecting to the local executor");
+
+ // If we can connect, great
+ rc = controld_connect_local_executor(lrm_state);
+ if (rc == pcmk_rc_ok) {
+ controld_set_fsa_input_flags(R_LRM_CONNECTED);
+ crm_info("Connection to the local executor established");
+ return;
+ }
+
+ // Otherwise, if we can try again, set a timer to do so
+ if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) {
+ crm_warn("Failed to connect to the local executor %d time%s "
+ "(%d max): %s", lrm_state->num_lrm_register_fails,
+ pcmk__plural_s(lrm_state->num_lrm_register_fails),
+ MAX_LRM_REG_FAILS, pcmk_rc_str(rc));
+ controld_start_wait_timer();
+ crmd_fsa_stall(FALSE);
+ return;
+ }
+
+ // Otherwise give up
+ crm_err("Failed to connect to the executor the max allowed "
+ "%d time%s: %s", lrm_state->num_lrm_register_fails,
+ pcmk__plural_s(lrm_state->num_lrm_register_fails),
+ pcmk_rc_str(rc));
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+}
+
+/* A_LRM_CONNECT */
+void
+do_lrm_control(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ /* This only pertains to local executor connections. Remote connections are
+ * handled as resources within the scheduler. Connecting and disconnecting
+ * from remote executor instances is handled differently.
+ */
+
+ lrm_state_t *lrm_state = NULL;
+
+ if (controld_globals.our_nodename == NULL) {
+ return; /* Nothing to do */
+ }
+ lrm_state = lrm_state_find_or_create(controld_globals.our_nodename);
+ if (lrm_state == NULL) {
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ return;
+ }
+
+ if (action & A_LRM_DISCONNECT) {
+ if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) {
+ if (action == A_LRM_DISCONNECT) {
+ crmd_fsa_stall(FALSE);
+ return;
+ }
+ }
+
+ controld_clear_fsa_input_flags(R_LRM_CONNECTED);
+ crm_info("Disconnecting from the executor");
+ lrm_state_disconnect(lrm_state);
+ lrm_state_reset_tables(lrm_state, FALSE);
+ crm_notice("Disconnected from the executor");
+ }
+
+ if (action & A_LRM_CONNECT) {
+ try_local_executor_connect(action, msg_data, lrm_state);
+ }
+
+ if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) {
+ crm_err("Unexpected action %s in %s", fsa_action2string(action),
+ __func__);
+ }
+}
+
+static gboolean
+lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level)
+{
+ int counter = 0;
+ gboolean rc = TRUE;
+ const char *when = "lrm disconnect";
+
+ GHashTableIter gIter;
+ const char *key = NULL;
+ rsc_history_t *entry = NULL;
+ active_op_t *pending = NULL;
+
+ crm_debug("Checking for active resources before exit");
+
+ if (cur_state == S_TERMINATE) {
+ log_level = LOG_ERR;
+ when = "shutdown";
+
+ } else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ when = "shutdown... waiting";
+ }
+
+ if ((lrm_state->active_ops != NULL) && lrm_state_is_connected(lrm_state)) {
+ guint removed = g_hash_table_foreach_remove(lrm_state->active_ops,
+ stop_recurring_actions,
+ lrm_state);
+ guint nremaining = g_hash_table_size(lrm_state->active_ops);
+
+ if (removed || nremaining) {
+ crm_notice("Stopped %u recurring operation%s at %s (%u remaining)",
+ removed, pcmk__plural_s(removed), when, nremaining);
+ }
+ }
+
+ if (lrm_state->active_ops != NULL) {
+ g_hash_table_iter_init(&gIter, lrm_state->active_ops);
+ while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) {
+ /* Ignore recurring actions in the shutdown calculations */
+ if (pending->interval_ms == 0) {
+ counter++;
+ }
+ }
+ }
+
+ if (counter > 0) {
+ do_crm_log(log_level, "%d pending executor operation%s at %s",
+ counter, pcmk__plural_s(counter), when);
+
+ if ((cur_state == S_TERMINATE)
+ || !pcmk_is_set(controld_globals.fsa_input_register,
+ R_SENT_RSC_STOP)) {
+ g_hash_table_iter_init(&gIter, lrm_state->active_ops);
+ while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) {
+ do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key);
+ }
+
+ } else {
+ rc = FALSE;
+ }
+ return rc;
+ }
+
+ if (lrm_state->resource_history == NULL) {
+ return rc;
+ }
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ /* At this point we're not waiting, we're just shutting down */
+ when = "shutdown";
+ }
+
+ counter = 0;
+ g_hash_table_iter_init(&gIter, lrm_state->resource_history);
+ while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) {
+ if (is_rsc_active(lrm_state, entry->id) == FALSE) {
+ continue;
+ }
+
+ counter++;
+ if (log_level == LOG_ERR) {
+ crm_info("Found %s active at %s", entry->id, when);
+ } else {
+ crm_trace("Found %s active at %s", entry->id, when);
+ }
+ if (lrm_state->active_ops != NULL) {
+ GHashTableIter hIter;
+
+ g_hash_table_iter_init(&hIter, lrm_state->active_ops);
+ while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) {
+ if (pcmk__str_eq(entry->id, pending->rsc_id, pcmk__str_none)) {
+ crm_notice("%sction %s (%s) incomplete at %s",
+ pending->interval_ms == 0 ? "A" : "Recurring a",
+ key, pending->op_key, when);
+ }
+ }
+ }
+ }
+
+ if (counter) {
+ crm_err("%d resource%s active at %s",
+ counter, (counter == 1)? " was" : "s were", when);
+ }
+
+ return rc;
+}
+
+static gboolean
+is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id)
+{
+ rsc_history_t *entry = NULL;
+
+ entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
+ if (entry == NULL || entry->last == NULL) {
+ return FALSE;
+ }
+
+ crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type,
+ entry->last->interval_ms, entry->last->rc);
+ if (entry->last->rc == PCMK_OCF_OK && pcmk__str_eq(entry->last->op_type, CRMD_ACTION_STOP, pcmk__str_casei)) {
+ return FALSE;
+
+ } else if (entry->last->rc == PCMK_OCF_OK
+ && pcmk__str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE, pcmk__str_casei)) {
+ // A stricter check is too complex ... leave that to the scheduler
+ return FALSE;
+
+ } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) {
+ return FALSE;
+
+ } else if ((entry->last->interval_ms == 0)
+ && (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) {
+ /* Badly configured resources can't be reliably stopped */
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static gboolean
+build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
+{
+ GHashTableIter iter;
+ rsc_history_t *entry = NULL;
+
+ g_hash_table_iter_init(&iter, lrm_state->resource_history);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
+
+ GList *gIter = NULL;
+ xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE);
+
+ crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id);
+ crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type);
+ crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.standard);
+ crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider);
+
+ if (entry->last && entry->last->params) {
+ const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
+ if (container) {
+ crm_trace("Resource %s is a part of container resource %s", entry->id, container);
+ crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container);
+ }
+ }
+ controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->failed,
+ lrm_state->node_name);
+ controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->last,
+ lrm_state->node_name);
+ for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) {
+ controld_add_resource_history_xml(xml_rsc, &(entry->rsc), gIter->data,
+ lrm_state->node_name);
+ }
+ }
+
+ return FALSE;
+}
+
+xmlNode *
+controld_query_executor_state(void)
+{
+ xmlNode *xml_state = NULL;
+ xmlNode *xml_data = NULL;
+ xmlNode *rsc_list = NULL;
+ crm_node_t *peer = NULL;
+ lrm_state_t *lrm_state = lrm_state_find(controld_globals.our_nodename);
+
+ if (!lrm_state) {
+ crm_err("Could not find executor state for node %s",
+ controld_globals.our_nodename);
+ return NULL;
+ }
+
+ peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY);
+ CRM_CHECK(peer != NULL, return NULL);
+
+ xml_state = create_node_state_update(peer,
+ node_update_cluster|node_update_peer,
+ NULL, __func__);
+ if (xml_state == NULL) {
+ return NULL;
+ }
+
+ xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM);
+ crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid);
+ rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES);
+
+ /* Build a list of active (not always running) resources */
+ build_active_RAs(lrm_state, rsc_list);
+
+ crm_log_xml_trace(xml_state, "Current executor state");
+
+ return xml_state;
+}
+
+/*!
+ * \internal
+ * \brief Map standard Pacemaker return code to operation status and OCF code
+ *
+ * \param[out] event Executor event whose status and return code should be set
+ * \param[in] rc Standard Pacemaker return code
+ */
+void
+controld_rc2event(lrmd_event_data_t *event, int rc)
+{
+ /* This is called for cleanup requests from controller peers/clients, not
+ * for resource actions, so no exit reason is needed.
+ */
+ switch (rc) {
+ case pcmk_rc_ok:
+ lrmd__set_result(event, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
+ break;
+ case EACCES:
+ lrmd__set_result(event, PCMK_OCF_INSUFFICIENT_PRIV,
+ PCMK_EXEC_ERROR, NULL);
+ break;
+ default:
+ lrmd__set_result(event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
+ NULL);
+ break;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Trigger a new transition after CIB status was deleted
+ *
+ * If a CIB status delete was not expected (as part of the transition graph),
+ * trigger a new transition by updating the (arbitrary) "last-lrm-refresh"
+ * cluster property.
+ *
+ * \param[in] from_sys IPC name that requested the delete
+ * \param[in] rsc_id Resource whose status was deleted (for logging only)
+ */
+void
+controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id)
+{
+ if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) {
+ char *now_s = crm_strdup_printf("%lld", (long long) time(NULL));
+
+ crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id);
+ cib__update_node_attr(controld_globals.logger_out,
+ controld_globals.cib_conn, cib_none,
+ XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
+ "last-lrm-refresh", now_s, NULL, NULL);
+ free(now_s);
+ }
+}
+
+static void
+notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc)
+{
+ lrmd_event_data_t *op = NULL;
+ const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
+ const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
+
+ crm_info("Notifying %s on %s that %s was%s deleted",
+ from_sys, (from_host? from_host : "localhost"), rsc_id,
+ ((rc == pcmk_ok)? "" : " not"));
+ op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE);
+ controld_rc2event(op, pcmk_legacy2rc(rc));
+ controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id);
+ lrmd_free_event(op);
+ controld_trigger_delete_refresh(from_sys, rsc_id);
+}
+
+static gboolean
+lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data)
+{
+ struct delete_event_s *event = user_data;
+ struct pending_deletion_op_s *op = value;
+
+ if (pcmk__str_eq(event->rsc, op->rsc, pcmk__str_none)) {
+ notify_deleted(event->lrm_state, op->input, event->rsc, event->rc);
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static gboolean
+lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
+{
+ const char *rsc = user_data;
+ active_op_t *pending = value;
+
+ if (pcmk__str_eq(rsc, pending->rsc_id, pcmk__str_none)) {
+ crm_info("Removing op %s:%d for deleted resource %s",
+ pending->op_key, pending->call_id, rsc);
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static void
+delete_rsc_entry(lrm_state_t *lrm_state, ha_msg_input_t *input,
+ const char *rsc_id, GHashTableIter *rsc_iter, int rc,
+ const char *user_name, bool from_cib)
+{
+ struct delete_event_s event;
+
+ CRM_CHECK(rsc_id != NULL, return);
+
+ if (rc == pcmk_ok) {
+ char *rsc_id_copy = strdup(rsc_id);
+
+ if (rsc_iter) {
+ g_hash_table_iter_remove(rsc_iter);
+ } else {
+ g_hash_table_remove(lrm_state->resource_history, rsc_id_copy);
+ }
+
+ if (from_cib) {
+ controld_delete_resource_history(rsc_id_copy, lrm_state->node_name,
+ user_name, crmd_cib_smart_opt());
+ }
+ g_hash_table_foreach_remove(lrm_state->active_ops,
+ lrm_remove_deleted_op, rsc_id_copy);
+ free(rsc_id_copy);
+ }
+
+ if (input) {
+ notify_deleted(lrm_state, input, rsc_id, rc);
+ }
+
+ event.rc = rc;
+ event.rsc = rsc_id;
+ event.lrm_state = lrm_state;
+ g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event);
+}
+
+static inline gboolean
+last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms)
+{
+ if (entry == NULL) {
+ return FALSE;
+ }
+ if (op == NULL) {
+ return TRUE;
+ }
+ return (pcmk__str_eq(op, entry->failed->op_type, pcmk__str_casei)
+ && (interval_ms == entry->failed->interval_ms));
+}
+
+/*!
+ * \internal
+ * \brief Clear a resource's last failure
+ *
+ * Erase a resource's last failure on a particular node from both the
+ * LRM resource history in the CIB, and the resource history remembered
+ * for the LRM state.
+ *
+ * \param[in] rsc_id Resource name
+ * \param[in] node_name Node name
+ * \param[in] operation If specified, only clear if matching this operation
+ * \param[in] interval_ms If operation is specified, it has this interval
+ */
+void
+lrm_clear_last_failure(const char *rsc_id, const char *node_name,
+ const char *operation, guint interval_ms)
+{
+ lrm_state_t *lrm_state = lrm_state_find(node_name);
+
+ if (lrm_state == NULL) {
+ return;
+ }
+ if (lrm_state->resource_history != NULL) {
+ rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history,
+ rsc_id);
+
+ if (last_failed_matches_op(entry, operation, interval_ms)) {
+ lrmd_free_event(entry->failed);
+ entry->failed = NULL;
+ }
+ }
+}
+
+/* Returns: gboolean - cancellation is in progress */
+static gboolean
+cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove)
+{
+ int rc = pcmk_ok;
+ char *local_key = NULL;
+ active_op_t *pending = NULL;
+
+ CRM_CHECK(op != 0, return FALSE);
+ CRM_CHECK(rsc_id != NULL, return FALSE);
+ if (key == NULL) {
+ local_key = make_stop_id(rsc_id, op);
+ key = local_key;
+ }
+ pending = g_hash_table_lookup(lrm_state->active_ops, key);
+
+ if (pending) {
+ if (remove && !pcmk_is_set(pending->flags, active_op_remove)) {
+ controld_set_active_op_flags(pending, active_op_remove);
+ crm_debug("Scheduling %s for removal", key);
+ }
+
+ if (pcmk_is_set(pending->flags, active_op_cancelled)) {
+ crm_debug("Operation %s already cancelled", key);
+ free(local_key);
+ return FALSE;
+ }
+ controld_set_active_op_flags(pending, active_op_cancelled);
+
+ } else {
+ crm_info("No pending op found for %s", key);
+ free(local_key);
+ return FALSE;
+ }
+
+ crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key);
+ rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type,
+ pending->interval_ms);
+ if (rc == pcmk_ok) {
+ crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key);
+ free(local_key);
+ return TRUE;
+ }
+
+ crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key);
+ /* The caller needs to make sure the entry is
+ * removed from the active operations list
+ *
+ * Usually by returning TRUE inside the worker function
+ * supplied to g_hash_table_foreach_remove()
+ *
+ * Not removing the entry from active operations will block
+ * the node from shutting down
+ */
+ free(local_key);
+ return FALSE;
+}
+
+struct cancel_data {
+ gboolean done;
+ gboolean remove;
+ const char *key;
+ lrmd_rsc_info_t *rsc;
+ lrm_state_t *lrm_state;
+};
+
+static gboolean
+cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
+{
+ gboolean remove = FALSE;
+ struct cancel_data *data = user_data;
+ active_op_t *op = value;
+
+ if (pcmk__str_eq(op->op_key, data->key, pcmk__str_none)) {
+ data->done = TRUE;
+ remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove);
+ }
+ return remove;
+}
+
+static gboolean
+cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove)
+{
+ guint removed = 0;
+ struct cancel_data data;
+
+ CRM_CHECK(rsc != NULL, return FALSE);
+ CRM_CHECK(key != NULL, return FALSE);
+
+ data.key = key;
+ data.rsc = rsc;
+ data.done = FALSE;
+ data.remove = remove;
+ data.lrm_state = lrm_state;
+
+ removed = g_hash_table_foreach_remove(lrm_state->active_ops,
+ cancel_action_by_key, &data);
+ crm_trace("Removed %u op cache entries, new size: %u",
+ removed, g_hash_table_size(lrm_state->active_ops));
+ return data.done;
+}
+
+/*!
+ * \internal
+ * \brief Retrieve resource information from LRM
+ *
+ * \param[in,out] lrm_state Executor connection state to use
+ * \param[in] rsc_xml XML containing resource configuration
+ * \param[in] do_create If true, register resource if not already
+ * \param[out] rsc_info Where to store information obtained from executor
+ *
+ * \retval pcmk_ok Success (and rsc_info holds newly allocated result)
+ * \retval -EINVAL Required information is missing from arguments
+ * \retval -ENOTCONN No active connection to LRM
+ * \retval -ENODEV Resource not found
+ * \retval -errno Error communicating with executor when registering resource
+ *
+ * \note Caller is responsible for freeing result on success.
+ */
+static int
+get_lrm_resource(lrm_state_t *lrm_state, const xmlNode *rsc_xml,
+ gboolean do_create, lrmd_rsc_info_t **rsc_info)
+{
+ const char *id = ID(rsc_xml);
+
+ CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL);
+ CRM_CHECK(id, return -EINVAL);
+
+ if (lrm_state_is_connected(lrm_state) == FALSE) {
+ return -ENOTCONN;
+ }
+
+ crm_trace("Retrieving resource information for %s from the executor", id);
+ *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
+
+ // If resource isn't known by ID, try clone name, if provided
+ if (!*rsc_info) {
+ const char *long_id = crm_element_value(rsc_xml, XML_ATTR_ID_LONG);
+
+ if (long_id) {
+ *rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0);
+ }
+ }
+
+ if ((*rsc_info == NULL) && do_create) {
+ const char *class = crm_element_value(rsc_xml, XML_AGENT_ATTR_CLASS);
+ const char *provider = crm_element_value(rsc_xml, XML_AGENT_ATTR_PROVIDER);
+ const char *type = crm_element_value(rsc_xml, XML_ATTR_TYPE);
+ int rc;
+
+ crm_trace("Registering resource %s with the executor", id);
+ rc = lrm_state_register_rsc(lrm_state, id, class, provider, type,
+ lrmd_opt_drop_recurring);
+ if (rc != pcmk_ok) {
+ fsa_data_t *msg_data = NULL;
+
+ crm_err("Could not register resource %s with the executor on %s: %s "
+ CRM_XS " rc=%d",
+ id, lrm_state->node_name, pcmk_strerror(rc), rc);
+
+ /* Register this as an internal error if this involves the local
+ * executor. Otherwise, we're likely dealing with an unresponsive
+ * remote node, which is not an FSA failure.
+ */
+ if (lrm_state_is_local(lrm_state) == TRUE) {
+ register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
+ }
+ return rc;
+ }
+
+ *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
+ }
+ return *rsc_info? pcmk_ok : -ENODEV;
+}
+
+static void
+delete_resource(lrm_state_t *lrm_state, const char *id, lrmd_rsc_info_t *rsc,
+ GHashTableIter *iter, const char *sys, const char *user,
+ ha_msg_input_t *request, bool unregister, bool from_cib)
+{
+ int rc = pcmk_ok;
+
+ crm_info("Removing resource %s from executor for %s%s%s",
+ id, sys, (user? " as " : ""), (user? user : ""));
+
+ if (rsc && unregister) {
+ rc = lrm_state_unregister_rsc(lrm_state, id, 0);
+ }
+
+ if (rc == pcmk_ok) {
+ crm_trace("Resource %s deleted from executor", id);
+ } else if (rc == -EINPROGRESS) {
+ crm_info("Deletion of resource '%s' from executor is pending", id);
+ if (request) {
+ struct pending_deletion_op_s *op = NULL;
+ char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE);
+
+ op = calloc(1, sizeof(struct pending_deletion_op_s));
+ op->rsc = strdup(rsc->id);
+ op->input = copy_ha_msg_input(request);
+ g_hash_table_insert(lrm_state->deletion_ops, ref, op);
+ }
+ return;
+ } else {
+ crm_warn("Could not delete '%s' from executor for %s%s%s: %s "
+ CRM_XS " rc=%d", id, sys, (user? " as " : ""),
+ (user? user : ""), pcmk_strerror(rc), rc);
+ }
+
+ delete_rsc_entry(lrm_state, request, id, iter, rc, user, from_cib);
+}
+
+static int
+get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id)
+{
+ int call_id = 999999999;
+ rsc_history_t *entry = NULL;
+
+ if(lrm_state) {
+ entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
+ }
+
+ /* Make sure the call id is greater than the last successful operation,
+ * otherwise the failure will not result in a possible recovery of the resource
+ * as it could appear the failure occurred before the successful start */
+ if (entry) {
+ call_id = entry->last_callid + 1;
+ }
+
+ if (call_id < 0) {
+ call_id = 1;
+ }
+ return call_id;
+}
+
+static void
+fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status,
+ enum ocf_exitcode op_exitcode, const char *exit_reason)
+{
+ op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
+ op->t_run = time(NULL);
+ op->t_rcchange = op->t_run;
+ lrmd__set_result(op, op_exitcode, op_status, exit_reason);
+}
+
+static void
+force_reprobe(lrm_state_t *lrm_state, const char *from_sys,
+ const char *from_host, const char *user_name,
+ gboolean is_remote_node, bool reprobe_all_nodes)
+{
+ GHashTableIter gIter;
+ rsc_history_t *entry = NULL;
+
+ crm_info("Clearing resource history on node %s", lrm_state->node_name);
+ g_hash_table_iter_init(&gIter, lrm_state->resource_history);
+ while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
+ /* only unregister the resource during a reprobe if it is not a remote connection
+ * resource. otherwise unregistering the connection will terminate remote-node
+ * membership */
+ bool unregister = true;
+
+ if (is_remote_lrmd_ra(NULL, NULL, entry->id)) {
+ unregister = false;
+
+ if (reprobe_all_nodes) {
+ lrm_state_t *remote_lrm_state = lrm_state_find(entry->id);
+
+ if (remote_lrm_state != NULL) {
+ /* If reprobing all nodes, be sure to reprobe the remote
+ * node before clearing its connection resource
+ */
+ force_reprobe(remote_lrm_state, from_sys, from_host,
+ user_name, TRUE, reprobe_all_nodes);
+ }
+ }
+ }
+
+ /* Don't delete from the CIB, since we'll delete the whole node's LRM
+ * state from the CIB soon
+ */
+ delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys,
+ user_name, NULL, unregister, false);
+ }
+
+ /* Now delete the copy in the CIB */
+ controld_delete_node_state(lrm_state->node_name, controld_section_lrm,
+ cib_scope_local);
+
+ // @COMPAT DCs < 1.1.14 need this deleted (in case it was explicitly false)
+ update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node);
+}
+
+/*!
+ * \internal
+ * \brief Fail a requested action without actually executing it
+ *
+ * For an action that can't be executed, process it similarly to an actual
+ * execution result, with specified error status (except for notify actions,
+ * which will always be treated as successful).
+ *
+ * \param[in,out] lrm_state Executor connection that action is for
+ * \param[in] action Action XML from request
+ * \param[in] rc Desired return code to use
+ * \param[in] op_status Desired operation status to use
+ * \param[in] exit_reason Human-friendly detail, if error
+ */
+static void
+synthesize_lrmd_failure(lrm_state_t *lrm_state, const xmlNode *action,
+ int op_status, enum ocf_exitcode rc,
+ const char *exit_reason)
+{
+ lrmd_event_data_t *op = NULL;
+ const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK);
+ const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET);
+ xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE);
+
+ if ((xml_rsc == NULL) || (ID(xml_rsc) == NULL)) {
+ /* @TODO Should we do something else, like direct ack? */
+ crm_info("Can't fake %s failure (%d) on %s without resource configuration",
+ crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc,
+ target_node);
+ return;
+
+ } else if(operation == NULL) {
+ /* This probably came from crm_resource -C, nothing to do */
+ crm_info("Can't fake %s failure (%d) on %s without operation",
+ ID(xml_rsc), rc, target_node);
+ return;
+ }
+
+ op = construct_op(lrm_state, action, ID(xml_rsc), operation);
+
+ if (pcmk__str_eq(operation, RSC_NOTIFY, pcmk__str_casei)) { // Notifications can't fail
+ fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_OK, NULL);
+ } else {
+ fake_op_status(lrm_state, op, op_status, rc, exit_reason);
+ }
+
+ crm_info("Faking " PCMK__OP_FMT " result (%d) on %s",
+ op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node);
+
+ // Process the result as if it came from the LRM
+ process_lrm_event(lrm_state, op, NULL, action);
+ lrmd_free_event(op);
+}
+
+/*!
+ * \internal
+ * \brief Get target of an LRM operation (replacing \p NULL with local node
+ * name)
+ *
+ * \param[in] xml LRM operation data XML
+ *
+ * \return LRM operation target node name (local node or Pacemaker Remote node)
+ */
+static const char *
+lrm_op_target(const xmlNode *xml)
+{
+ const char *target = NULL;
+
+ if (xml) {
+ target = crm_element_value(xml, XML_LRM_ATTR_TARGET);
+ }
+ if (target == NULL) {
+ target = controld_globals.our_nodename;
+ }
+ return target;
+}
+
+static void
+fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name,
+ const char *from_host, const char *from_sys)
+{
+ lrmd_event_data_t *op = NULL;
+ lrmd_rsc_info_t *rsc = NULL;
+ xmlNode *xml_rsc = find_xml_node(xml, XML_CIB_TAG_RESOURCE, TRUE);
+
+ CRM_CHECK(xml_rsc != NULL, return);
+
+ /* The executor simply executes operations and reports the results, without
+ * any concept of success or failure, so to fail a resource, we must fake
+ * what a failure looks like.
+ *
+ * To do this, we create a fake executor operation event for the resource,
+ * and pass that event to the executor client callback so it will be
+ * processed as if it came from the executor.
+ */
+ op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon");
+
+ free((char*) op->user_data);
+ op->user_data = NULL;
+ op->interval_ms = 0;
+
+ if (user_name && !pcmk__is_privileged(user_name)) {
+ crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc));
+ fake_op_status(lrm_state, op, PCMK_EXEC_ERROR,
+ PCMK_OCF_INSUFFICIENT_PRIV,
+ "Unprivileged user cannot fail resources");
+ controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc));
+ lrmd_free_event(op);
+ return;
+ }
+
+
+ if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) {
+ crm_info("Failing resource %s...", rsc->id);
+ fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_UNKNOWN_ERROR,
+ "Simulated failure");
+ process_lrm_event(lrm_state, op, NULL, xml);
+ op->rc = PCMK_OCF_OK; // The request to fail the resource succeeded
+ lrmd_free_rsc_info(rsc);
+
+ } else {
+ crm_info("Cannot find/create resource in order to fail it...");
+ crm_log_xml_warn(xml, "bad input");
+ fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_UNKNOWN_ERROR,
+ "Cannot fail unknown resource");
+ }
+
+ controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc));
+ lrmd_free_event(op);
+}
+
+static void
+handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys,
+ const char *from_host, const char *user_name,
+ gboolean is_remote_node, bool reprobe_all_nodes)
+{
+ crm_notice("Forcing the status of all resources to be redetected");
+ force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node,
+ reprobe_all_nodes);
+
+ if (!pcmk__strcase_any_of(from_sys, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) {
+
+ xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host,
+ from_sys, CRM_SYSTEM_LRMD,
+ controld_globals.our_uuid);
+
+ crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host);
+
+ if (relay_message(reply, TRUE) == FALSE) {
+ crm_log_xml_err(reply, "Unable to route reply");
+ }
+ free_xml(reply);
+ }
+}
+
+static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state,
+ lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys)
+{
+ char *op_key = NULL;
+ char *meta_key = NULL;
+ int call = 0;
+ const char *call_id = NULL;
+ const char *op_task = NULL;
+ guint interval_ms = 0;
+ gboolean in_progress = FALSE;
+ xmlNode *params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE);
+
+ CRM_CHECK(params != NULL, return FALSE);
+
+ meta_key = crm_meta_name(XML_LRM_ATTR_TASK);
+ op_task = crm_element_value(params, meta_key);
+ free(meta_key);
+ CRM_CHECK(op_task != NULL, return FALSE);
+
+ meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS);
+ if (crm_element_value_ms(params, meta_key, &interval_ms) != pcmk_ok) {
+ free(meta_key);
+ return FALSE;
+ }
+ free(meta_key);
+
+ op_key = pcmk__op_key(rsc->id, op_task, interval_ms);
+
+ meta_key = crm_meta_name(XML_LRM_ATTR_CALLID);
+ call_id = crm_element_value(params, meta_key);
+ free(meta_key);
+
+ crm_debug("Scheduler requested op %s (call=%s) be cancelled",
+ op_key, (call_id? call_id : "NA"));
+ pcmk__scan_min_int(call_id, &call, 0);
+ if (call == 0) {
+ // Normal case when the scheduler cancels a recurring op
+ in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE);
+
+ } else {
+ // Normal case when the scheduler cancels an orphan op
+ in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE);
+ }
+
+ // Acknowledge cancellation operation if for a remote connection resource
+ if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
+ char *op_id = make_stop_id(rsc->id, call);
+
+ if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) {
+ crm_info("Nothing known about operation %d for %s", call, op_key);
+ }
+ controld_delete_action_history_by_key(rsc->id, lrm_state->node_name,
+ op_key, call);
+ send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
+ from_host, from_sys);
+
+ /* needed at least for cancellation of a remote operation */
+ if (lrm_state->active_ops != NULL) {
+ g_hash_table_remove(lrm_state->active_ops, op_id);
+ }
+ free(op_id);
+
+ } else {
+ /* No ack is needed since abcdaa8, but peers with older versions
+ * in a rolling upgrade need one. We didn't bump the feature set
+ * at that commit, so we can only compare against the previous
+ * CRM version (3.0.8). If any peers have feature set 3.0.9 but
+ * not abcdaa8, they will time out waiting for the ack (no
+ * released versions of Pacemaker are affected).
+ */
+ const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION);
+
+ if (compare_version(peer_version, "3.0.8") <= 0) {
+ crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)",
+ op_key, from_host, peer_version);
+ send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
+ from_host, from_sys);
+ }
+ }
+
+ free(op_key);
+ return TRUE;
+}
+
+static void
+do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state,
+ lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host,
+ bool crm_rsc_delete, const char *user_name)
+{
+ bool unregister = true;
+ int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name,
+ user_name,
+ cib_dryrun|cib_sync_call);
+
+ if (cib_rc != pcmk_rc_ok) {
+ lrmd_event_data_t *op = NULL;
+
+ op = construct_op(lrm_state, input->xml, rsc->id, CRMD_ACTION_DELETE);
+
+ /* These are resource clean-ups, not actions, so no exit reason is
+ * needed.
+ */
+ lrmd__set_result(op, pcmk_rc2ocf(cib_rc), PCMK_EXEC_ERROR, NULL);
+ controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id);
+ lrmd_free_event(op);
+ return;
+ }
+
+ if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
+ unregister = false;
+ }
+
+ delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys,
+ user_name, input, unregister, true);
+}
+
+// User data for asynchronous metadata execution
+struct metadata_cb_data {
+ lrmd_rsc_info_t *rsc; // Copy of resource information
+ xmlNode *input_xml; // Copy of FSA input XML
+};
+
+static struct metadata_cb_data *
+new_metadata_cb_data(lrmd_rsc_info_t *rsc, xmlNode *input_xml)
+{
+ struct metadata_cb_data *data = NULL;
+
+ data = calloc(1, sizeof(struct metadata_cb_data));
+ CRM_ASSERT(data != NULL);
+ data->input_xml = copy_xml(input_xml);
+ data->rsc = lrmd_copy_rsc_info(rsc);
+ return data;
+}
+
+static void
+free_metadata_cb_data(struct metadata_cb_data *data)
+{
+ lrmd_free_rsc_info(data->rsc);
+ free_xml(data->input_xml);
+ free(data);
+}
+
+/*!
+ * \internal
+ * \brief Execute an action after metadata has been retrieved
+ *
+ * \param[in] pid Ignored
+ * \param[in] result Result of metadata action
+ * \param[in] user_data Metadata callback data
+ */
+static void
+metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data)
+{
+ struct metadata_cb_data *data = (struct metadata_cb_data *) user_data;
+
+ struct ra_metadata_s *md = NULL;
+ lrm_state_t *lrm_state = lrm_state_find(lrm_op_target(data->input_xml));
+
+ if ((lrm_state != NULL) && pcmk__result_ok(result)) {
+ md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc,
+ result->action_stdout);
+ }
+ do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md);
+ free_metadata_cb_data(data);
+}
+
+/* A_LRM_INVOKE */
+void
+do_lrm_invoke(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ lrm_state_t *lrm_state = NULL;
+ const char *crm_op = NULL;
+ const char *from_sys = NULL;
+ const char *from_host = NULL;
+ const char *operation = NULL;
+ ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
+ const char *user_name = NULL;
+ const char *target_node = lrm_op_target(input->xml);
+ gboolean is_remote_node = FALSE;
+ bool crm_rsc_delete = FALSE;
+
+ // Message routed to the local node is targeting a specific, non-local node
+ is_remote_node = !pcmk__str_eq(target_node, controld_globals.our_nodename,
+ pcmk__str_casei);
+
+ lrm_state = lrm_state_find(target_node);
+ if ((lrm_state == NULL) && is_remote_node) {
+ crm_err("Failing action because local node has never had connection to remote node %s",
+ target_node);
+ synthesize_lrmd_failure(NULL, input->xml, PCMK_EXEC_NOT_CONNECTED,
+ PCMK_OCF_UNKNOWN_ERROR,
+ "Local node has no connection to remote");
+ return;
+ }
+ CRM_ASSERT(lrm_state != NULL);
+
+ user_name = pcmk__update_acl_user(input->msg, F_CRM_USER, NULL);
+ crm_op = crm_element_value(input->msg, F_CRM_TASK);
+ from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
+ if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
+ from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
+ }
+
+ if (pcmk__str_eq(crm_op, CRM_OP_LRM_DELETE, pcmk__str_none)) {
+ if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
+ crm_rsc_delete = TRUE; // from crm_resource
+ }
+ operation = CRMD_ACTION_DELETE;
+
+ } else if (input->xml != NULL) {
+ operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK);
+ }
+
+ CRM_CHECK(!pcmk__str_empty(crm_op) || !pcmk__str_empty(operation), return);
+
+ crm_trace("'%s' execution request from %s as %s user",
+ pcmk__s(crm_op, operation),
+ pcmk__s(from_sys, "unknown subsystem"),
+ pcmk__s(user_name, "current"));
+
+ if (pcmk__str_eq(crm_op, CRM_OP_LRM_FAIL, pcmk__str_none)) {
+ fail_lrm_resource(input->xml, lrm_state, user_name, from_host,
+ from_sys);
+
+ } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_REFRESH, pcmk__str_none)) {
+ /* @COMPAT This can only be sent by crm_resource --refresh on a
+ * Pacemaker Remote node running Pacemaker 1.1.9, which is extremely
+ * unlikely. It previously would cause the controller to re-write its
+ * resource history to the CIB. Just ignore it.
+ */
+ crm_notice("Ignoring refresh request from Pacemaker Remote 1.1.9 node");
+
+ // @COMPAT DCs <1.1.14 in a rolling upgrade might schedule this op
+ } else if (pcmk__str_eq(operation, CRM_OP_PROBED, pcmk__str_none)) {
+ update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE,
+ user_name, is_remote_node);
+
+ } else if (pcmk__str_eq(crm_op, CRM_OP_REPROBE, pcmk__str_none)
+ || pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) {
+ const char *raw_target = NULL;
+
+ if (input->xml != NULL) {
+ // For CRM_OP_REPROBE, a NULL target means we're targeting all nodes
+ raw_target = crm_element_value(input->xml, XML_LRM_ATTR_TARGET);
+ }
+ handle_reprobe_op(lrm_state, from_sys, from_host, user_name,
+ is_remote_node, (raw_target == NULL));
+
+ } else if (operation != NULL) {
+ lrmd_rsc_info_t *rsc = NULL;
+ xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
+ gboolean create_rsc = !pcmk__str_eq(operation, CRMD_ACTION_DELETE,
+ pcmk__str_none);
+ int rc;
+
+ // We can't return anything meaningful without a resource ID
+ CRM_CHECK(xml_rsc && ID(xml_rsc), return);
+
+ rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc);
+ if (rc == -ENOTCONN) {
+ synthesize_lrmd_failure(lrm_state, input->xml,
+ PCMK_EXEC_NOT_CONNECTED,
+ PCMK_OCF_UNKNOWN_ERROR,
+ "Not connected to remote executor");
+ return;
+
+ } else if ((rc < 0) && !create_rsc) {
+ /* Delete of malformed or nonexistent resource
+ * (deleting something that does not exist is a success)
+ */
+ crm_notice("Not registering resource '%s' for a %s event "
+ CRM_XS " get-rc=%d (%s) transition-key=%s",
+ ID(xml_rsc), operation,
+ rc, pcmk_strerror(rc), ID(input->xml));
+ delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok,
+ user_name, true);
+ return;
+
+ } else if (rc == -EINVAL) {
+ // Resource operation on malformed resource
+ crm_err("Invalid resource definition for %s", ID(xml_rsc));
+ crm_log_xml_warn(input->msg, "invalid resource");
+ synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR,
+ PCMK_OCF_NOT_CONFIGURED, // fatal error
+ "Invalid resource definition");
+ return;
+
+ } else if (rc < 0) {
+ // Error communicating with the executor
+ crm_err("Could not register resource '%s' with executor: %s "
+ CRM_XS " rc=%d",
+ ID(xml_rsc), pcmk_strerror(rc), rc);
+ crm_log_xml_warn(input->msg, "failed registration");
+ synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR,
+ PCMK_OCF_INVALID_PARAM, // hard error
+ "Could not register resource with executor");
+ return;
+ }
+
+ if (pcmk__str_eq(operation, CRMD_ACTION_CANCEL, pcmk__str_none)) {
+ if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) {
+ crm_log_xml_warn(input->xml, "Bad command");
+ }
+
+ } else if (pcmk__str_eq(operation, CRMD_ACTION_DELETE, pcmk__str_none)) {
+ do_lrm_delete(input, lrm_state, rsc, from_sys, from_host,
+ crm_rsc_delete, user_name);
+
+ } else {
+ struct ra_metadata_s *md = NULL;
+
+ /* Getting metadata from cache is OK except for start actions --
+ * always refresh from the agent for those, in case the resource
+ * agent was updated.
+ *
+ * @TODO Only refresh metadata for starts if the agent actually
+ * changed (using something like inotify, or a hash or modification
+ * time of the agent executable).
+ */
+ if (strcmp(operation, CRMD_ACTION_START) != 0) {
+ md = controld_get_rsc_metadata(lrm_state, rsc,
+ controld_metadata_from_cache);
+ }
+
+ if ((md == NULL) && crm_op_needs_metadata(rsc->standard,
+ operation)) {
+ /* Most likely, we'll need the agent metadata to record the
+ * pending operation and the operation result. Get it now rather
+ * than wait until then, so the metadata action doesn't eat into
+ * the real action's timeout.
+ *
+ * @TODO Metadata is retrieved via direct execution of the
+ * agent, which has a couple of related issues: the executor
+ * should execute agents, not the controller; and metadata for
+ * Pacemaker Remote nodes should be collected on those nodes,
+ * not locally.
+ */
+ struct metadata_cb_data *data = NULL;
+
+ data = new_metadata_cb_data(rsc, input->xml);
+ crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously",
+ rsc->id, rsc->standard,
+ ((rsc->provider == NULL)? "" : ":"),
+ ((rsc->provider == NULL)? "" : rsc->provider),
+ rsc->type);
+ (void) lrmd__metadata_async(rsc, metadata_complete,
+ (void *) data);
+ } else {
+ do_lrm_rsc_op(lrm_state, rsc, input->xml, md);
+ }
+ }
+
+ lrmd_free_rsc_info(rsc);
+
+ } else {
+ crm_err("Invalid execution request: unknown command '%s' (bug?)",
+ crm_op);
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ }
+}
+
+static lrmd_event_data_t *
+construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op,
+ const char *rsc_id, const char *operation)
+{
+ lrmd_event_data_t *op = NULL;
+ const char *op_delay = NULL;
+ const char *op_timeout = NULL;
+ GHashTable *params = NULL;
+
+ xmlNode *primitive = NULL;
+ const char *class = NULL;
+
+ const char *transition = NULL;
+
+ CRM_ASSERT(rsc_id && operation);
+
+ op = lrmd_new_event(rsc_id, operation, 0);
+ op->type = lrmd_event_exec_complete;
+ op->timeout = 0;
+ op->start_delay = 0;
+ lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL);
+
+ if (rsc_op == NULL) {
+ CRM_LOG_ASSERT(pcmk__str_eq(CRMD_ACTION_STOP, operation, pcmk__str_casei));
+ op->user_data = NULL;
+ /* the stop_all_resources() case
+ * by definition there is no DC (or they'd be shutting
+ * us down).
+ * So we should put our version here.
+ */
+ op->params = pcmk__strkey_table(free, free);
+
+ g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET));
+
+ crm_trace("Constructed %s op for %s", operation, rsc_id);
+ return op;
+ }
+
+ params = xml2list(rsc_op);
+ g_hash_table_remove(params, CRM_META "_op_target_rc");
+
+ op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY);
+ pcmk__scan_min_int(op_delay, &op->start_delay, 0);
+
+ op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT);
+ pcmk__scan_min_int(op_timeout, &op->timeout, 0);
+
+ if (pcmk__guint_from_hash(params, CRM_META "_" XML_LRM_ATTR_INTERVAL_MS, 0,
+ &(op->interval_ms)) != pcmk_rc_ok) {
+ op->interval_ms = 0;
+ }
+
+ /* Use pcmk_monitor_timeout instead of meta timeout for stonith
+ recurring monitor, if set */
+ primitive = find_xml_node(rsc_op, XML_CIB_TAG_RESOURCE, FALSE);
+ class = crm_element_value(primitive, XML_AGENT_ATTR_CLASS);
+
+ if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_fence_params)
+ && pcmk__str_eq(operation, CRMD_ACTION_STATUS, pcmk__str_casei)
+ && (op->interval_ms > 0)) {
+
+ op_timeout = g_hash_table_lookup(params, "pcmk_monitor_timeout");
+ if (op_timeout != NULL) {
+ op->timeout = crm_get_msec(op_timeout);
+ }
+ }
+
+ if (!pcmk__str_eq(operation, RSC_STOP, pcmk__str_casei)) {
+ op->params = params;
+
+ } else {
+ rsc_history_t *entry = NULL;
+
+ if (lrm_state) {
+ entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
+ }
+
+ /* If we do not have stop parameters cached, use
+ * whatever we are given */
+ if (!entry || !entry->stop_params) {
+ op->params = params;
+ } else {
+ /* Copy the cached parameter list so that we stop the resource
+ * with the old attributes, not the new ones */
+ op->params = pcmk__strkey_table(free, free);
+
+ g_hash_table_foreach(params, copy_meta_keys, op->params);
+ g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params);
+ g_hash_table_destroy(params);
+ params = NULL;
+ }
+ }
+
+ /* sanity */
+ if (op->timeout <= 0) {
+ op->timeout = op->interval_ms;
+ }
+ if (op->start_delay < 0) {
+ op->start_delay = 0;
+ }
+
+ transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY);
+ CRM_CHECK(transition != NULL, return op);
+
+ op->user_data = strdup(transition);
+
+ if (op->interval_ms != 0) {
+ if (pcmk__strcase_any_of(operation, CRMD_ACTION_START, CRMD_ACTION_STOP, NULL)) {
+ crm_err("Start and Stop actions cannot have an interval: %u",
+ op->interval_ms);
+ op->interval_ms = 0;
+ }
+ }
+
+ crm_trace("Constructed %s op for %s: interval=%u",
+ operation, rsc_id, op->interval_ms);
+
+ return op;
+}
+
+/*!
+ * \internal
+ * \brief Send a (synthesized) event result
+ *
+ * Reply with a synthesized event result directly, as opposed to going through
+ * the executor.
+ *
+ * \param[in] to_host Host to send result to
+ * \param[in] to_sys IPC name to send result (NULL for transition engine)
+ * \param[in] rsc Type information about resource the result is for
+ * \param[in,out] op Event with result to send
+ * \param[in] rsc_id ID of resource the result is for
+ */
+void
+controld_ack_event_directly(const char *to_host, const char *to_sys,
+ const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op,
+ const char *rsc_id)
+{
+ xmlNode *reply = NULL;
+ xmlNode *update, *iter;
+ crm_node_t *peer = NULL;
+
+ CRM_CHECK(op != NULL, return);
+ if (op->rsc_id == NULL) {
+ CRM_ASSERT(rsc_id != NULL);
+ op->rsc_id = strdup(rsc_id);
+ }
+ if (to_sys == NULL) {
+ to_sys = CRM_SYSTEM_TENGINE;
+ }
+
+ peer = crm_get_peer(0, controld_globals.our_nodename);
+ update = create_node_state_update(peer, node_update_none, NULL,
+ __func__);
+
+ iter = create_xml_node(update, XML_CIB_TAG_LRM);
+ crm_xml_add(iter, XML_ATTR_ID, controld_globals.our_uuid);
+ iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
+ iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
+
+ crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
+
+ controld_add_resource_history_xml(iter, rsc, op,
+ controld_globals.our_nodename);
+ reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL);
+
+ crm_log_xml_trace(update, "[direct ACK]");
+
+ crm_debug("ACK'ing resource op " PCMK__OP_FMT " from %s: %s",
+ op->rsc_id, op->op_type, op->interval_ms, op->user_data,
+ crm_element_value(reply, XML_ATTR_REFERENCE));
+
+ if (relay_message(reply, TRUE) == FALSE) {
+ crm_log_xml_err(reply, "Unable to route reply");
+ }
+
+ free_xml(update);
+ free_xml(reply);
+}
+
+gboolean
+verify_stopped(enum crmd_fsa_state cur_state, int log_level)
+{
+ gboolean res = TRUE;
+ GList *lrm_state_list = lrm_state_get_list();
+ GList *state_entry;
+
+ for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
+ lrm_state_t *lrm_state = state_entry->data;
+
+ if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) {
+ /* keep iterating through all even when false is returned */
+ res = FALSE;
+ }
+ }
+
+ controld_set_fsa_input_flags(R_SENT_RSC_STOP);
+ g_list_free(lrm_state_list); lrm_state_list = NULL;
+ return res;
+}
+
+struct stop_recurring_action_s {
+ lrmd_rsc_info_t *rsc;
+ lrm_state_t *lrm_state;
+};
+
+static gboolean
+stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
+{
+ gboolean remove = FALSE;
+ struct stop_recurring_action_s *event = user_data;
+ active_op_t *op = value;
+
+ if ((op->interval_ms != 0)
+ && pcmk__str_eq(op->rsc_id, event->rsc->id, pcmk__str_none)) {
+
+ crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key);
+ remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE);
+ }
+
+ return remove;
+}
+
+static gboolean
+stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
+{
+ gboolean remove = FALSE;
+ lrm_state_t *lrm_state = user_data;
+ active_op_t *op = value;
+
+ if (op->interval_ms != 0) {
+ crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id,
+ (const char *) key);
+ remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE);
+ }
+
+ return remove;
+}
+
+/*!
+ * \internal
+ * \brief Check whether recurring actions should be cancelled before an action
+ *
+ * \param[in] rsc_id Resource that action is for
+ * \param[in] action Action being performed
+ * \param[in] interval_ms Operation interval of \p action (in milliseconds)
+ *
+ * \return true if recurring actions should be cancelled, otherwise false
+ */
+static bool
+should_cancel_recurring(const char *rsc_id, const char *action, guint interval_ms)
+{
+ if (is_remote_lrmd_ra(NULL, NULL, rsc_id) && (interval_ms == 0)
+ && (strcmp(action, CRMD_ACTION_MIGRATE) == 0)) {
+ /* Don't stop monitoring a migrating Pacemaker Remote connection
+ * resource until the entire migration has completed. We must detect if
+ * the connection is unexpectedly severed, even during a migration.
+ */
+ return false;
+ }
+
+ // Cancel recurring actions before changing resource state
+ return (interval_ms == 0)
+ && !pcmk__str_any_of(action, CRMD_ACTION_STATUS, CRMD_ACTION_NOTIFY,
+ NULL);
+}
+
+/*!
+ * \internal
+ * \brief Check whether an action should not be performed at this time
+ *
+ * \param[in] operation Action to be performed
+ *
+ * \return Readable description of why action should not be performed,
+ * or NULL if it should be performed
+ */
+static const char *
+should_nack_action(const char *action)
+{
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)
+ && pcmk__str_eq(action, RSC_START, pcmk__str_none)) {
+
+ register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
+ return "Not attempting start due to shutdown in progress";
+ }
+
+ switch (controld_globals.fsa_state) {
+ case S_NOT_DC:
+ case S_POLICY_ENGINE: // Recalculating
+ case S_TRANSITION_ENGINE:
+ break;
+ default:
+ if (!pcmk__str_eq(action, CRMD_ACTION_STOP, pcmk__str_none)) {
+ return "Controller cannot attempt actions at this time";
+ }
+ break;
+ }
+ return NULL;
+}
+
+static void
+do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg,
+ struct ra_metadata_s *md)
+{
+ int rc;
+ int call_id = 0;
+ char *op_id = NULL;
+ lrmd_event_data_t *op = NULL;
+ fsa_data_t *msg_data = NULL;
+ const char *transition = NULL;
+ const char *operation = NULL;
+ const char *nack_reason = NULL;
+
+ CRM_CHECK((rsc != NULL) && (msg != NULL), return);
+
+ operation = crm_element_value(msg, XML_LRM_ATTR_TASK);
+ CRM_CHECK(!pcmk__str_empty(operation), return);
+
+ transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
+ if (pcmk__str_empty(transition)) {
+ crm_log_xml_err(msg, "Missing transition number");
+ }
+
+ if (lrm_state == NULL) {
+ // This shouldn't be possible, but provide a failsafe just in case
+ crm_err("Cannot execute %s of %s: No executor connection "
+ CRM_XS " transition_key=%s",
+ operation, rsc->id, pcmk__s(transition, ""));
+ synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID,
+ PCMK_OCF_UNKNOWN_ERROR,
+ "No executor connection");
+ return;
+ }
+
+ if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD,
+ CRMD_ACTION_RELOAD_AGENT, NULL)) {
+ /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs
+ * will schedule reload-agent actions only. In either case, we need
+ * to map that to whatever the resource agent actually supports.
+ * Default to the OCF 1.1 name.
+ */
+ if ((md != NULL)
+ && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) {
+ operation = CRMD_ACTION_RELOAD;
+ } else {
+ operation = CRMD_ACTION_RELOAD_AGENT;
+ }
+ }
+
+ op = construct_op(lrm_state, msg, rsc->id, operation);
+ CRM_CHECK(op != NULL, return);
+
+ if (should_cancel_recurring(rsc->id, operation, op->interval_ms)) {
+ guint removed = 0;
+ struct stop_recurring_action_s data;
+
+ data.rsc = rsc;
+ data.lrm_state = lrm_state;
+ removed = g_hash_table_foreach_remove(lrm_state->active_ops,
+ stop_recurring_action_by_rsc,
+ &data);
+
+ if (removed) {
+ crm_debug("Stopped %u recurring operation%s in preparation for "
+ PCMK__OP_FMT, removed, pcmk__plural_s(removed),
+ rsc->id, operation, op->interval_ms);
+ }
+ }
+
+ /* now do the op */
+ crm_notice("Requesting local execution of %s operation for %s on %s "
+ CRM_XS " transition_key=%s op_key=" PCMK__OP_FMT,
+ crm_action_str(op->op_type, op->interval_ms), rsc->id, lrm_state->node_name,
+ pcmk__s(transition, ""), rsc->id, operation, op->interval_ms);
+
+ nack_reason = should_nack_action(operation);
+ if (nack_reason != NULL) {
+ crm_notice("Discarding attempt to perform action %s on %s in state %s "
+ "(shutdown=%s)", operation, rsc->id,
+ fsa_state2string(controld_globals.fsa_state),
+ pcmk__btoa(pcmk_is_set(controld_globals.fsa_input_register,
+ R_SHUTDOWN)));
+
+ lrmd__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_INVALID,
+ nack_reason);
+ controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id);
+ lrmd_free_event(op);
+ free(op_id);
+ return;
+ }
+
+ controld_record_pending_op(lrm_state->node_name, rsc, op);
+
+ op_id = pcmk__op_key(rsc->id, op->op_type, op->interval_ms);
+
+ if (op->interval_ms > 0) {
+ /* cancel it so we can then restart it without conflict */
+ cancel_op_key(lrm_state, rsc, op_id, FALSE);
+ }
+
+ rc = controld_execute_resource_agent(lrm_state, rsc->id, op->op_type,
+ op->user_data, op->interval_ms,
+ op->timeout, op->start_delay,
+ op->params, &call_id);
+ if (rc == pcmk_rc_ok) {
+ /* record all operations so we can wait
+ * for them to complete during shutdown
+ */
+ char *call_id_s = make_stop_id(rsc->id, call_id);
+ active_op_t *pending = NULL;
+
+ pending = calloc(1, sizeof(active_op_t));
+ crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);
+
+ pending->call_id = call_id;
+ pending->interval_ms = op->interval_ms;
+ pending->op_type = strdup(operation);
+ pending->op_key = strdup(op_id);
+ pending->rsc_id = strdup(rsc->id);
+ pending->start_time = time(NULL);
+ pcmk__str_update(&pending->user_data, op->user_data);
+ if (crm_element_value_epoch(msg, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
+ &(pending->lock_time)) != pcmk_ok) {
+ pending->lock_time = 0;
+ }
+ g_hash_table_replace(lrm_state->active_ops, call_id_s, pending);
+
+ if ((op->interval_ms > 0)
+ && (op->start_delay > START_DELAY_THRESHOLD)) {
+ int target_rc = PCMK_OCF_OK;
+
+ crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id);
+ decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc);
+ lrmd__set_result(op, target_rc, PCMK_EXEC_DONE, NULL);
+ controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id);
+ }
+
+ pending->params = op->params;
+ op->params = NULL;
+
+ } else if (lrm_state_is_local(lrm_state)) {
+ crm_err("Could not initiate %s action for resource %s locally: %s "
+ CRM_XS " rc=%d", operation, rsc->id, pcmk_rc_str(rc), rc);
+ fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED,
+ PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc));
+ process_lrm_event(lrm_state, op, NULL, NULL);
+ register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
+
+ } else {
+ crm_err("Could not initiate %s action for resource %s remotely on %s: "
+ "%s " CRM_XS " rc=%d",
+ operation, rsc->id, lrm_state->node_name, pcmk_rc_str(rc), rc);
+ fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED,
+ PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc));
+ process_lrm_event(lrm_state, op, NULL, NULL);
+ }
+
+ free(op_id);
+ lrmd_free_event(op);
+}
+
+void
+do_lrm_event(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)
+{
+ CRM_CHECK(FALSE, return);
+}
+
+static char *
+unescape_newlines(const char *string)
+{
+ char *pch = NULL;
+ char *ret = NULL;
+ static const char *escaped_newline = "\\n";
+
+ if (!string) {
+ return NULL;
+ }
+
+ ret = strdup(string);
+ pch = strstr(ret, escaped_newline);
+ while (pch != NULL) {
+ /* Replace newline escape pattern with actual newline (and a space so we
+ * don't have to shuffle the rest of the buffer)
+ */
+ pch[0] = '\n';
+ pch[1] = ' ';
+ pch = strstr(pch, escaped_newline);
+ }
+
+ return ret;
+}
+
+static bool
+did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id,
+ const char * op_type, guint interval_ms)
+{
+ rsc_history_t *entry = NULL;
+
+ CRM_CHECK(lrm_state != NULL, return FALSE);
+ CRM_CHECK(rsc_id != NULL, return FALSE);
+ CRM_CHECK(op_type != NULL, return FALSE);
+
+ entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
+ if (entry == NULL || entry->failed == NULL) {
+ return FALSE;
+ }
+
+ if (pcmk__str_eq(entry->failed->rsc_id, rsc_id, pcmk__str_none)
+ && pcmk__str_eq(entry->failed->op_type, op_type, pcmk__str_casei)
+ && entry->failed->interval_ms == interval_ms) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+/*!
+ * \internal
+ * \brief Log the result of an executor action (actual or synthesized)
+ *
+ * \param[in] op Executor action to log result for
+ * \param[in] op_key Operation key for action
+ * \param[in] node_name Name of node action was performed on, if known
+ * \param[in] confirmed Whether to log that graph action was confirmed
+ */
+static void
+log_executor_event(const lrmd_event_data_t *op, const char *op_key,
+ const char *node_name, gboolean confirmed)
+{
+ int log_level = LOG_ERR;
+ GString *str = g_string_sized_new(100); // reasonable starting size
+
+ pcmk__g_strcat(str,
+ "Result of ", crm_action_str(op->op_type, op->interval_ms),
+ " operation for ", op->rsc_id, NULL);
+
+ if (node_name != NULL) {
+ pcmk__g_strcat(str, " on ", node_name, NULL);
+ }
+
+ switch (op->op_status) {
+ case PCMK_EXEC_DONE:
+ log_level = LOG_NOTICE;
+ pcmk__g_strcat(str, ": ", services_ocf_exitcode_str(op->rc), NULL);
+ break;
+
+ case PCMK_EXEC_TIMEOUT:
+ pcmk__g_strcat(str,
+ ": ", pcmk_exec_status_str(op->op_status), " after ",
+ pcmk__readable_interval(op->timeout), NULL);
+ break;
+
+ case PCMK_EXEC_CANCELLED:
+ log_level = LOG_INFO;
+ /* order of __attribute__ and Fall through comment is IMPORTANT!
+ * do not change it without proper testing with both clang and gcc
+ * in multiple versions.
+ * the clang check allows to build with all versions of clang.
+ * the has_c_attribute check is to workaround a bug in clang version
+ * in rhel7. has_attribute would happily return "YES SIR WE GOT IT"
+ * and fail the build the next line.
+ */
+#ifdef __clang__
+#ifdef __has_c_attribute
+#if __has_attribute(fallthrough)
+ __attribute__((fallthrough));
+#endif
+#endif
+#endif
+ // Fall through
+ default:
+ pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status),
+ NULL);
+ }
+
+ if ((op->exit_reason != NULL)
+ && ((op->op_status != PCMK_EXEC_DONE) || (op->rc != PCMK_OCF_OK))) {
+
+ pcmk__g_strcat(str, " (", op->exit_reason, ")", NULL);
+ }
+
+ g_string_append(str, " " CRM_XS);
+ g_string_append_printf(str, " graph action %sconfirmed; call=%d key=%s",
+ (confirmed? "" : "un"), op->call_id, op_key);
+ if (op->op_status == PCMK_EXEC_DONE) {
+ g_string_append_printf(str, " rc=%d", op->rc);
+ }
+
+ do_crm_log(log_level, "%s", str->str);
+ g_string_free(str, TRUE);
+
+ /* The services library has already logged the output at info or debug
+ * level, so just raise to notice if it looks like a failure.
+ */
+ if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) {
+ char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output",
+ op->rsc_id, op->op_type,
+ op->interval_ms, node_name);
+
+ crm_log_output(LOG_NOTICE, prefix, op->output);
+ free(prefix);
+ }
+}
+
+void
+process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
+ active_op_t *pending, const xmlNode *action_xml)
+{
+ char *op_id = NULL;
+ char *op_key = NULL;
+
+ gboolean remove = FALSE;
+ gboolean removed = FALSE;
+ bool need_direct_ack = FALSE;
+ lrmd_rsc_info_t *rsc = NULL;
+ const char *node_name = NULL;
+
+ CRM_CHECK(op != NULL, return);
+ CRM_CHECK(op->rsc_id != NULL, return);
+
+ // Remap new status codes for older DCs
+ if (compare_version(controld_globals.dc_version, "3.2.0") < 0) {
+ switch (op->op_status) {
+ case PCMK_EXEC_NOT_CONNECTED:
+ lrmd__set_result(op, PCMK_OCF_CONNECTION_DIED,
+ PCMK_EXEC_ERROR, op->exit_reason);
+ break;
+ case PCMK_EXEC_INVALID:
+ lrmd__set_result(op, CRM_DIRECT_NACK_RC, PCMK_EXEC_ERROR,
+ op->exit_reason);
+ break;
+ default:
+ break;
+ }
+ }
+
+ op_id = make_stop_id(op->rsc_id, op->call_id);
+ op_key = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms);
+
+ // Get resource info if available (from executor state or action XML)
+ if (lrm_state) {
+ rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
+ }
+ if ((rsc == NULL) && action_xml) {
+ xmlNode *xml = find_xml_node(action_xml, XML_CIB_TAG_RESOURCE, TRUE);
+
+ const char *standard = crm_element_value(xml, XML_AGENT_ATTR_CLASS);
+ const char *provider = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER);
+ const char *type = crm_element_value(xml, XML_ATTR_TYPE);
+
+ if (standard && type) {
+ crm_info("%s agent information not cached, using %s%s%s:%s from action XML",
+ op->rsc_id, standard,
+ (provider? ":" : ""), (provider? provider : ""), type);
+ rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type);
+ } else {
+ crm_err("Can't process %s result because %s agent information not cached or in XML",
+ op_key, op->rsc_id);
+ }
+ }
+
+ // Get node name if available (from executor state or action XML)
+ if (lrm_state) {
+ node_name = lrm_state->node_name;
+ } else if (action_xml) {
+ node_name = crm_element_value(action_xml, XML_LRM_ATTR_TARGET);
+ }
+
+ if(pending == NULL) {
+ remove = TRUE;
+ if (lrm_state) {
+ pending = g_hash_table_lookup(lrm_state->active_ops, op_id);
+ }
+ }
+
+ if (op->op_status == PCMK_EXEC_ERROR) {
+ switch(op->rc) {
+ case PCMK_OCF_NOT_RUNNING:
+ case PCMK_OCF_RUNNING_PROMOTED:
+ case PCMK_OCF_DEGRADED:
+ case PCMK_OCF_DEGRADED_PROMOTED:
+ // Leave it to the TE/scheduler to decide if this is an error
+ op->op_status = PCMK_EXEC_DONE;
+ break;
+ default:
+ /* Nothing to do */
+ break;
+ }
+ }
+
+ if (op->op_status != PCMK_EXEC_CANCELLED) {
+ /* We might not record the result, so directly acknowledge it to the
+ * originator instead, so it doesn't time out waiting for the result
+ * (especially important if part of a transition).
+ */
+ need_direct_ack = TRUE;
+
+ if (controld_action_is_recordable(op->op_type)) {
+ if (node_name && rsc) {
+ // We should record the result, and happily, we can
+ time_t lock_time = (pending == NULL)? 0 : pending->lock_time;
+
+ controld_update_resource_history(node_name, rsc, op, lock_time);
+ need_direct_ack = FALSE;
+
+ } else if (op->rsc_deleted) {
+ /* We shouldn't record the result (likely the resource was
+ * refreshed, cleaned, or removed while this operation was
+ * in flight).
+ */
+ crm_notice("Not recording %s result in CIB because "
+ "resource information was removed since it was initiated",
+ op_key);
+ } else {
+ /* This shouldn't be possible; the executor didn't consider the
+ * resource deleted, but we couldn't find resource or node
+ * information.
+ */
+ crm_err("Unable to record %s result in CIB: %s", op_key,
+ (node_name? "No resource information" : "No node name"));
+ }
+ }
+
+ } else if (op->interval_ms == 0) {
+ /* A non-recurring operation was cancelled. Most likely, the
+ * never-initiated action was removed from the executor's pending
+ * operations list upon resource removal.
+ */
+ need_direct_ack = TRUE;
+
+ } else if (pending == NULL) {
+ /* This recurring operation was cancelled, but was not pending. No
+ * transition actions are waiting on it, nothing needs to be done.
+ */
+
+ } else if (op->user_data == NULL) {
+ /* This recurring operation was cancelled and pending, but we don't
+ * have a transition key. This should never happen.
+ */
+ crm_err("Recurring operation %s was cancelled without transition information",
+ op_key);
+
+ } else if (pcmk_is_set(pending->flags, active_op_remove)) {
+ /* This recurring operation was cancelled (by us) and pending, and we
+ * have been waiting for it to finish.
+ */
+ if (lrm_state) {
+ controld_delete_action_history(op);
+ }
+
+ /* Directly acknowledge failed recurring actions here. The above call to
+ * controld_delete_action_history() will not erase any corresponding
+ * last_failure entry, which means that the DC won't confirm the
+ * cancellation via process_op_deletion(), and the transition would
+ * otherwise wait for the action timer to pop.
+ */
+ if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id,
+ pending->op_type, pending->interval_ms)) {
+ need_direct_ack = TRUE;
+ }
+
+ } else if (op->rsc_deleted) {
+ /* This recurring operation was cancelled (but not by us, and the
+ * executor does not have resource information, likely due to resource
+ * cleanup, refresh, or removal) and pending.
+ */
+ crm_debug("Recurring op %s was cancelled due to resource deletion",
+ op_key);
+ need_direct_ack = TRUE;
+
+ } else {
+ /* This recurring operation was cancelled (but not by us, likely by the
+ * executor before stopping the resource) and pending. We don't need to
+ * do anything special.
+ */
+ }
+
+ if (need_direct_ack) {
+ controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id);
+ }
+
+ if(remove == FALSE) {
+ /* The caller will do this afterwards, but keep the logging consistent */
+ removed = TRUE;
+
+ } else if (lrm_state && ((op->interval_ms == 0)
+ || (op->op_status == PCMK_EXEC_CANCELLED))) {
+
+ gboolean found = g_hash_table_remove(lrm_state->active_ops, op_id);
+
+ if (op->interval_ms != 0) {
+ removed = TRUE;
+ } else if (found) {
+ removed = TRUE;
+ crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed",
+ op_key, op->call_id, op_id,
+ g_hash_table_size(lrm_state->active_ops));
+ }
+ }
+
+ log_executor_event(op, op_key, node_name, removed);
+
+ if (lrm_state) {
+ if (!pcmk__str_eq(op->op_type, RSC_METADATA, pcmk__str_casei)) {
+ crmd_alert_resource_op(lrm_state->node_name, op);
+ } else if (rsc && (op->rc == PCMK_OCF_OK)) {
+ char *metadata = unescape_newlines(op->output);
+
+ controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata);
+ free(metadata);
+ }
+ }
+
+ if (op->rsc_deleted) {
+ crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key);
+ if (lrm_state) {
+ delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL,
+ true);
+ }
+ }
+
+ /* If a shutdown was escalated while operations were pending,
+ * then the FSA will be stalled right now... allow it to continue
+ */
+ controld_trigger_fsa();
+ if (lrm_state && rsc) {
+ update_history_cache(lrm_state, rsc, op);
+ }
+
+ lrmd_free_rsc_info(rsc);
+ free(op_key);
+ free(op_id);
+}
diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c
new file mode 100644
index 0000000..8c68bfc
--- /dev/null
+++ b/daemons/controld/controld_execd_state.c
@@ -0,0 +1,814 @@
+/*
+ * Copyright 2012-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <errno.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/iso8601.h>
+#include <crm/pengine/rules.h>
+#include <crm/pengine/rules_internal.h>
+#include <crm/lrmd_internal.h>
+
+#include <pacemaker-internal.h>
+#include <pacemaker-controld.h>
+
+static GHashTable *lrm_state_table = NULL;
+extern GHashTable *proxy_table;
+int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg);
+void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg));
+
+static void
+free_rsc_info(gpointer value)
+{
+ lrmd_rsc_info_t *rsc_info = value;
+
+ lrmd_free_rsc_info(rsc_info);
+}
+
+static void
+free_deletion_op(gpointer value)
+{
+ struct pending_deletion_op_s *op = value;
+
+ free(op->rsc);
+ delete_ha_msg_input(op->input);
+ free(op);
+}
+
+static void
+free_recurring_op(gpointer value)
+{
+ active_op_t *op = value;
+
+ free(op->user_data);
+ free(op->rsc_id);
+ free(op->op_type);
+ free(op->op_key);
+ if (op->params) {
+ g_hash_table_destroy(op->params);
+ }
+ free(op);
+}
+
+static gboolean
+fail_pending_op(gpointer key, gpointer value, gpointer user_data)
+{
+ lrmd_event_data_t event = { 0, };
+ lrm_state_t *lrm_state = user_data;
+ active_op_t *op = value;
+
+ crm_trace("Pre-emptively failing " PCMK__OP_FMT " on %s (call=%s, %s)",
+ op->rsc_id, op->op_type, op->interval_ms,
+ lrm_state->node_name, (char*)key, op->user_data);
+
+ event.type = lrmd_event_exec_complete;
+ event.rsc_id = op->rsc_id;
+ event.op_type = op->op_type;
+ event.user_data = op->user_data;
+ event.timeout = 0;
+ event.interval_ms = op->interval_ms;
+ lrmd__set_result(&event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_NOT_CONNECTED,
+ "Action was pending when executor connection was dropped");
+ event.t_run = (unsigned int) op->start_time;
+ event.t_rcchange = (unsigned int) op->start_time;
+
+ event.call_id = op->call_id;
+ event.remote_nodename = lrm_state->node_name;
+ event.params = op->params;
+
+ process_lrm_event(lrm_state, &event, op, NULL);
+ lrmd__reset_result(&event);
+ return TRUE;
+}
+
+gboolean
+lrm_state_is_local(lrm_state_t *lrm_state)
+{
+ return (lrm_state != NULL)
+ && pcmk__str_eq(lrm_state->node_name, controld_globals.our_nodename,
+ pcmk__str_casei);
+}
+
+/*!
+ * \internal
+ * \brief Create executor state entry for a node and add it to the state table
+ *
+ * \param[in] node_name Node to create entry for
+ *
+ * \return Newly allocated executor state object initialized for \p node_name
+ */
+static lrm_state_t *
+lrm_state_create(const char *node_name)
+{
+ lrm_state_t *state = NULL;
+
+ if (!node_name) {
+ crm_err("No node name given for lrm state object");
+ return NULL;
+ }
+
+ state = calloc(1, sizeof(lrm_state_t));
+ if (!state) {
+ return NULL;
+ }
+
+ state->node_name = strdup(node_name);
+ state->rsc_info_cache = pcmk__strkey_table(NULL, free_rsc_info);
+ state->deletion_ops = pcmk__strkey_table(free, free_deletion_op);
+ state->active_ops = pcmk__strkey_table(free, free_recurring_op);
+ state->resource_history = pcmk__strkey_table(NULL, history_free);
+ state->metadata_cache = metadata_cache_new();
+
+ g_hash_table_insert(lrm_state_table, (char *)state->node_name, state);
+ return state;
+}
+
+void
+lrm_state_destroy(const char *node_name)
+{
+ g_hash_table_remove(lrm_state_table, node_name);
+}
+
+static gboolean
+remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data)
+{
+ remote_proxy_t *proxy = value;
+ const char *node_name = user_data;
+
+ if (pcmk__str_eq(node_name, proxy->node_name, pcmk__str_casei)) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static remote_proxy_t *
+find_connected_proxy_by_node(const char * node_name)
+{
+ GHashTableIter gIter;
+ remote_proxy_t *proxy = NULL;
+
+ CRM_CHECK(proxy_table != NULL, return NULL);
+
+ g_hash_table_iter_init(&gIter, proxy_table);
+
+ while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) &proxy)) {
+ if (proxy->source
+ && pcmk__str_eq(node_name, proxy->node_name, pcmk__str_casei)) {
+ return proxy;
+ }
+ }
+
+ return NULL;
+}
+
+static void
+remote_proxy_disconnect_by_node(const char * node_name)
+{
+ remote_proxy_t *proxy = NULL;
+
+ CRM_CHECK(proxy_table != NULL, return);
+
+ while ((proxy = find_connected_proxy_by_node(node_name)) != NULL) {
+ /* mainloop_del_ipc_client() eventually calls remote_proxy_disconnected()
+ * , which removes the entry from proxy_table.
+ * Do not do this in a g_hash_table_iter_next() loop. */
+ if (proxy->source) {
+ mainloop_del_ipc_client(proxy->source);
+ }
+ }
+
+ return;
+}
+
+static void
+internal_lrm_state_destroy(gpointer data)
+{
+ lrm_state_t *lrm_state = data;
+
+ if (!lrm_state) {
+ return;
+ }
+
+ /* Rather than directly remove the recorded proxy entries from proxy_table,
+ * make sure any connected proxies get disconnected. So that
+ * remote_proxy_disconnected() will be called and as well remove the
+ * entries from proxy_table.
+ */
+ remote_proxy_disconnect_by_node(lrm_state->node_name);
+
+ crm_trace("Destroying proxy table %s with %u members",
+ lrm_state->node_name, g_hash_table_size(proxy_table));
+ // Just in case there's still any leftovers in proxy_table
+ g_hash_table_foreach_remove(proxy_table, remote_proxy_remove_by_node, (char *) lrm_state->node_name);
+ remote_ra_cleanup(lrm_state);
+ lrmd_api_delete(lrm_state->conn);
+
+ if (lrm_state->rsc_info_cache) {
+ crm_trace("Destroying rsc info cache with %u members",
+ g_hash_table_size(lrm_state->rsc_info_cache));
+ g_hash_table_destroy(lrm_state->rsc_info_cache);
+ }
+ if (lrm_state->resource_history) {
+ crm_trace("Destroying history op cache with %u members",
+ g_hash_table_size(lrm_state->resource_history));
+ g_hash_table_destroy(lrm_state->resource_history);
+ }
+ if (lrm_state->deletion_ops) {
+ crm_trace("Destroying deletion op cache with %u members",
+ g_hash_table_size(lrm_state->deletion_ops));
+ g_hash_table_destroy(lrm_state->deletion_ops);
+ }
+ if (lrm_state->active_ops != NULL) {
+ crm_trace("Destroying pending op cache with %u members",
+ g_hash_table_size(lrm_state->active_ops));
+ g_hash_table_destroy(lrm_state->active_ops);
+ }
+ metadata_cache_free(lrm_state->metadata_cache);
+
+ free((char *)lrm_state->node_name);
+ free(lrm_state);
+}
+
+void
+lrm_state_reset_tables(lrm_state_t * lrm_state, gboolean reset_metadata)
+{
+ if (lrm_state->resource_history) {
+ crm_trace("Resetting resource history cache with %u members",
+ g_hash_table_size(lrm_state->resource_history));
+ g_hash_table_remove_all(lrm_state->resource_history);
+ }
+ if (lrm_state->deletion_ops) {
+ crm_trace("Resetting deletion operations cache with %u members",
+ g_hash_table_size(lrm_state->deletion_ops));
+ g_hash_table_remove_all(lrm_state->deletion_ops);
+ }
+ if (lrm_state->active_ops != NULL) {
+ crm_trace("Resetting active operations cache with %u members",
+ g_hash_table_size(lrm_state->active_ops));
+ g_hash_table_remove_all(lrm_state->active_ops);
+ }
+ if (lrm_state->rsc_info_cache) {
+ crm_trace("Resetting resource information cache with %u members",
+ g_hash_table_size(lrm_state->rsc_info_cache));
+ g_hash_table_remove_all(lrm_state->rsc_info_cache);
+ }
+ if (reset_metadata) {
+ metadata_cache_reset(lrm_state->metadata_cache);
+ }
+}
+
+gboolean
+lrm_state_init_local(void)
+{
+ if (lrm_state_table) {
+ return TRUE;
+ }
+
+ lrm_state_table = pcmk__strikey_table(NULL, internal_lrm_state_destroy);
+ if (!lrm_state_table) {
+ return FALSE;
+ }
+
+ proxy_table = pcmk__strikey_table(NULL, remote_proxy_free);
+ if (!proxy_table) {
+ g_hash_table_destroy(lrm_state_table);
+ lrm_state_table = NULL;
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+void
+lrm_state_destroy_all(void)
+{
+ if (lrm_state_table) {
+ crm_trace("Destroying state table with %u members",
+ g_hash_table_size(lrm_state_table));
+ g_hash_table_destroy(lrm_state_table); lrm_state_table = NULL;
+ }
+ if(proxy_table) {
+ crm_trace("Destroying proxy table with %u members",
+ g_hash_table_size(proxy_table));
+ g_hash_table_destroy(proxy_table); proxy_table = NULL;
+ }
+}
+
+lrm_state_t *
+lrm_state_find(const char *node_name)
+{
+ if (!node_name) {
+ return NULL;
+ }
+ return g_hash_table_lookup(lrm_state_table, node_name);
+}
+
+lrm_state_t *
+lrm_state_find_or_create(const char *node_name)
+{
+ lrm_state_t *lrm_state;
+
+ lrm_state = g_hash_table_lookup(lrm_state_table, node_name);
+ if (!lrm_state) {
+ lrm_state = lrm_state_create(node_name);
+ }
+
+ return lrm_state;
+}
+
+GList *
+lrm_state_get_list(void)
+{
+ return g_hash_table_get_values(lrm_state_table);
+}
+
+void
+lrm_state_disconnect_only(lrm_state_t * lrm_state)
+{
+ int removed = 0;
+
+ if (!lrm_state->conn) {
+ return;
+ }
+ crm_trace("Disconnecting %s", lrm_state->node_name);
+
+ remote_proxy_disconnect_by_node(lrm_state->node_name);
+
+ ((lrmd_t *) lrm_state->conn)->cmds->disconnect(lrm_state->conn);
+
+ if (!pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ removed = g_hash_table_foreach_remove(lrm_state->active_ops,
+ fail_pending_op, lrm_state);
+ crm_trace("Synthesized %d operation failures for %s", removed, lrm_state->node_name);
+ }
+}
+
+void
+lrm_state_disconnect(lrm_state_t * lrm_state)
+{
+ if (!lrm_state->conn) {
+ return;
+ }
+
+ lrm_state_disconnect_only(lrm_state);
+
+ lrmd_api_delete(lrm_state->conn);
+ lrm_state->conn = NULL;
+}
+
+int
+lrm_state_is_connected(lrm_state_t * lrm_state)
+{
+ if (!lrm_state->conn) {
+ return FALSE;
+ }
+ return ((lrmd_t *) lrm_state->conn)->cmds->is_connected(lrm_state->conn);
+}
+
+int
+lrm_state_poke_connection(lrm_state_t * lrm_state)
+{
+
+ if (!lrm_state->conn) {
+ return -ENOTCONN;
+ }
+ return ((lrmd_t *) lrm_state->conn)->cmds->poke_connection(lrm_state->conn);
+}
+
+// \return Standard Pacemaker return code
+int
+controld_connect_local_executor(lrm_state_t *lrm_state)
+{
+ int rc = pcmk_rc_ok;
+
+ if (lrm_state->conn == NULL) {
+ lrmd_t *api = NULL;
+
+ rc = lrmd__new(&api, NULL, NULL, 0);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+ api->cmds->set_callback(api, lrm_op_callback);
+ lrm_state->conn = api;
+ }
+
+ rc = ((lrmd_t *) lrm_state->conn)->cmds->connect(lrm_state->conn,
+ CRM_SYSTEM_CRMD, NULL);
+ rc = pcmk_legacy2rc(rc);
+
+ if (rc == pcmk_rc_ok) {
+ lrm_state->num_lrm_register_fails = 0;
+ } else {
+ lrm_state->num_lrm_register_fails++;
+ }
+ return rc;
+}
+
+static remote_proxy_t *
+crmd_remote_proxy_new(lrmd_t *lrmd, const char *node_name, const char *session_id, const char *channel)
+{
+ struct ipc_client_callbacks proxy_callbacks = {
+ .dispatch = remote_proxy_dispatch,
+ .destroy = remote_proxy_disconnected
+ };
+ remote_proxy_t *proxy = remote_proxy_new(lrmd, &proxy_callbacks, node_name,
+ session_id, channel);
+ return proxy;
+}
+
+gboolean
+crmd_is_proxy_session(const char *session)
+{
+ return g_hash_table_lookup(proxy_table, session) ? TRUE : FALSE;
+}
+
+void
+crmd_proxy_send(const char *session, xmlNode *msg)
+{
+ remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session);
+ lrm_state_t *lrm_state = NULL;
+
+ if (!proxy) {
+ return;
+ }
+ crm_log_xml_trace(msg, "to-proxy");
+ lrm_state = lrm_state_find(proxy->node_name);
+ if (lrm_state) {
+ crm_trace("Sending event to %.8s on %s", proxy->session_id, proxy->node_name);
+ remote_proxy_relay_event(proxy, msg);
+ }
+}
+
+static void
+crmd_proxy_dispatch(const char *session, xmlNode *msg)
+{
+ crm_trace("Processing proxied IPC message from session %s", session);
+ crm_log_xml_trace(msg, "controller[inbound]");
+ crm_xml_add(msg, F_CRM_SYS_FROM, session);
+ if (controld_authorize_ipc_message(msg, NULL, session)) {
+ route_message(C_IPC_MESSAGE, msg);
+ }
+ controld_trigger_fsa();
+}
+
+static void
+remote_config_check(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ if (rc != pcmk_ok) {
+ crm_err("Query resulted in an error: %s", pcmk_strerror(rc));
+
+ if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
+ crm_err("The cluster is mis-configured - shutting down and staying down");
+ }
+
+ } else {
+ lrmd_t * lrmd = (lrmd_t *)user_data;
+ crm_time_t *now = crm_time_new(NULL);
+ GHashTable *config_hash = pcmk__strkey_table(free, free);
+
+ crm_debug("Call %d : Parsing CIB options", call_id);
+
+ pe_unpack_nvpairs(output, output, XML_CIB_TAG_PROPSET, NULL,
+ config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL);
+
+ /* Now send it to the remote peer */
+ lrmd__validate_remote_settings(lrmd, config_hash);
+
+ g_hash_table_destroy(config_hash);
+ crm_time_free(now);
+ }
+}
+
+static void
+crmd_remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg)
+{
+ lrm_state_t *lrm_state = userdata;
+ const char *session = crm_element_value(msg, F_LRMD_IPC_SESSION);
+ remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session);
+
+ const char *op = crm_element_value(msg, F_LRMD_IPC_OP);
+ if (pcmk__str_eq(op, LRMD_IPC_OP_NEW, pcmk__str_casei)) {
+ const char *channel = crm_element_value(msg, F_LRMD_IPC_IPC_SERVER);
+
+ proxy = crmd_remote_proxy_new(lrmd, lrm_state->node_name, session, channel);
+ if (!remote_ra_controlling_guest(lrm_state)) {
+ if (proxy != NULL) {
+ cib_t *cib_conn = controld_globals.cib_conn;
+
+ /* Look up stonith-watchdog-timeout and send to the remote peer for validation */
+ int rc = cib_conn->cmds->query(cib_conn, XML_CIB_TAG_CRMCONFIG,
+ NULL, cib_scope_local);
+ cib_conn->cmds->register_callback_full(cib_conn, rc, 10, FALSE,
+ lrmd,
+ "remote_config_check",
+ remote_config_check,
+ NULL);
+ }
+ } else {
+ crm_debug("Skipping remote_config_check for guest-nodes");
+ }
+
+ } else if (pcmk__str_eq(op, LRMD_IPC_OP_SHUTDOWN_REQ, pcmk__str_casei)) {
+ char *now_s = NULL;
+
+ crm_notice("%s requested shutdown of its remote connection",
+ lrm_state->node_name);
+
+ if (!remote_ra_is_in_maintenance(lrm_state)) {
+ now_s = pcmk__ttoa(time(NULL));
+ update_attrd(lrm_state->node_name, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, TRUE);
+ free(now_s);
+
+ remote_proxy_ack_shutdown(lrmd);
+
+ crm_warn("Reconnection attempts to %s may result in failures that must be cleared",
+ lrm_state->node_name);
+ } else {
+ remote_proxy_nack_shutdown(lrmd);
+
+ crm_notice("Remote resource for %s is not managed so no ordered shutdown happening",
+ lrm_state->node_name);
+ }
+ return;
+
+ } else if (pcmk__str_eq(op, LRMD_IPC_OP_REQUEST, pcmk__str_casei) && proxy && proxy->is_local) {
+ /* This is for the controller, which we are, so don't try
+ * to send to ourselves over IPC -- do it directly.
+ */
+ int flags = 0;
+ xmlNode *request = get_message_xml(msg, F_LRMD_IPC_MSG);
+
+ CRM_CHECK(request != NULL, return);
+ CRM_CHECK(lrm_state->node_name, return);
+ crm_xml_add(request, XML_ACL_TAG_ROLE, "pacemaker-remote");
+ pcmk__update_acl_user(request, F_LRMD_IPC_USER, lrm_state->node_name);
+
+ /* Pacemaker Remote nodes don't know their own names (as known to the
+ * cluster). When getting a node info request with no name or ID, add
+ * the name, so we don't return info for ourselves instead of the
+ * Pacemaker Remote node.
+ */
+ if (pcmk__str_eq(crm_element_value(request, F_CRM_TASK), CRM_OP_NODE_INFO, pcmk__str_casei)) {
+ int node_id = 0;
+
+ crm_element_value_int(request, XML_ATTR_ID, &node_id);
+ if ((node_id <= 0)
+ && (crm_element_value(request, XML_ATTR_UNAME) == NULL)) {
+ crm_xml_add(request, XML_ATTR_UNAME, lrm_state->node_name);
+ }
+ }
+
+ crmd_proxy_dispatch(session, request);
+
+ crm_element_value_int(msg, F_LRMD_IPC_MSG_FLAGS, &flags);
+ if (flags & crm_ipc_client_response) {
+ int msg_id = 0;
+ xmlNode *op_reply = create_xml_node(NULL, "ack");
+
+ crm_xml_add(op_reply, "function", __func__);
+ crm_xml_add_int(op_reply, "line", __LINE__);
+
+ crm_element_value_int(msg, F_LRMD_IPC_MSG_ID, &msg_id);
+ remote_proxy_relay_response(proxy, op_reply, msg_id);
+
+ free_xml(op_reply);
+ }
+
+ } else {
+ remote_proxy_cb(lrmd, lrm_state->node_name, msg);
+ }
+}
+
+
+// \return Standard Pacemaker return code
+int
+controld_connect_remote_executor(lrm_state_t *lrm_state, const char *server,
+ int port, int timeout_ms)
+{
+ int rc = pcmk_rc_ok;
+
+ if (lrm_state->conn == NULL) {
+ lrmd_t *api = NULL;
+
+ rc = lrmd__new(&api, lrm_state->node_name, server, port);
+ if (rc != pcmk_rc_ok) {
+ crm_warn("Pacemaker Remote connection to %s:%s failed: %s "
+ CRM_XS " rc=%d", server, port, pcmk_rc_str(rc), rc);
+
+ return rc;
+ }
+ lrm_state->conn = api;
+ api->cmds->set_callback(api, remote_lrm_op_callback);
+ lrmd_internal_set_proxy_callback(api, lrm_state, crmd_remote_proxy_cb);
+ }
+
+ crm_trace("Initiating remote connection to %s:%d with timeout %dms",
+ server, port, timeout_ms);
+ rc = ((lrmd_t *) lrm_state->conn)->cmds->connect_async(lrm_state->conn,
+ lrm_state->node_name,
+ timeout_ms);
+ if (rc == pcmk_ok) {
+ lrm_state->num_lrm_register_fails = 0;
+ } else {
+ lrm_state->num_lrm_register_fails++; // Ignored for remote connections
+ }
+ return pcmk_legacy2rc(rc);
+}
+
+int
+lrm_state_get_metadata(lrm_state_t * lrm_state,
+ const char *class,
+ const char *provider,
+ const char *agent, char **output, enum lrmd_call_options options)
+{
+ lrmd_key_value_t *params = NULL;
+
+ if (!lrm_state->conn) {
+ return -ENOTCONN;
+ }
+
+ /* Add the node name to the environment, as is done with normal resource
+ * action calls. Meta-data calls shouldn't need it, but some agents are
+ * written with an ocf_local_nodename call at the beginning regardless of
+ * action. Without the environment variable, the agent would try to contact
+ * the controller to get the node name -- but the controller would be
+ * blocking on the synchronous meta-data call.
+ *
+ * At this point, we have to assume that agents are unlikely to make other
+ * calls that require the controller, such as crm_node --quorum or
+ * --cluster-id.
+ *
+ * @TODO Make meta-data calls asynchronous. (This will be part of a larger
+ * project to make meta-data calls via the executor rather than directly.)
+ */
+ params = lrmd_key_value_add(params, CRM_META "_" XML_LRM_ATTR_TARGET,
+ lrm_state->node_name);
+
+ return ((lrmd_t *) lrm_state->conn)->cmds->get_metadata_params(lrm_state->conn,
+ class, provider, agent, output, options, params);
+}
+
+int
+lrm_state_cancel(lrm_state_t *lrm_state, const char *rsc_id, const char *action,
+ guint interval_ms)
+{
+ if (!lrm_state->conn) {
+ return -ENOTCONN;
+ }
+
+ /* Figure out a way to make this async?
+ * NOTICE: Currently it's synced and directly acknowledged in do_lrm_invoke(). */
+ if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
+ return remote_ra_cancel(lrm_state, rsc_id, action, interval_ms);
+ }
+ return ((lrmd_t *) lrm_state->conn)->cmds->cancel(lrm_state->conn, rsc_id,
+ action, interval_ms);
+}
+
+lrmd_rsc_info_t *
+lrm_state_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options)
+{
+ lrmd_rsc_info_t *rsc = NULL;
+
+ if (!lrm_state->conn) {
+ return NULL;
+ }
+ if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
+ return remote_ra_get_rsc_info(lrm_state, rsc_id);
+ }
+
+ rsc = g_hash_table_lookup(lrm_state->rsc_info_cache, rsc_id);
+ if (rsc == NULL) {
+ /* only contact the lrmd if we don't already have a cached rsc info */
+ rsc = ((lrmd_t *) lrm_state->conn)->cmds->get_rsc_info(lrm_state->conn, rsc_id, options);
+ if (rsc == NULL) {
+ return NULL;
+ }
+ /* cache the result */
+ g_hash_table_insert(lrm_state->rsc_info_cache, rsc->id, rsc);
+ }
+
+ return lrmd_copy_rsc_info(rsc);
+
+}
+
+/*!
+ * \internal
+ * \brief Initiate a resource agent action
+ *
+ * \param[in,out] lrm_state Executor state object
+ * \param[in] rsc_id ID of resource for action
+ * \param[in] action Action to execute
+ * \param[in] userdata String to copy and pass to execution callback
+ * \param[in] interval_ms Action interval (in milliseconds)
+ * \param[in] timeout_ms Action timeout (in milliseconds)
+ * \param[in] start_delay_ms Delay (in ms) before initiating action
+ * \param[in] parameters Hash table of resource parameters
+ * \param[out] call_id Where to store call ID on success
+ *
+ * \return Standard Pacemaker return code
+ */
+int
+controld_execute_resource_agent(lrm_state_t *lrm_state, const char *rsc_id,
+ const char *action, const char *userdata,
+ guint interval_ms, int timeout_ms,
+ int start_delay_ms, GHashTable *parameters,
+ int *call_id)
+{
+ int rc = pcmk_rc_ok;
+ lrmd_key_value_t *params = NULL;
+
+ if (lrm_state->conn == NULL) {
+ return ENOTCONN;
+ }
+
+ // Convert parameters from hash table to list
+ if (parameters != NULL) {
+ const char *key = NULL;
+ const char *value = NULL;
+ GHashTableIter iter;
+
+ g_hash_table_iter_init(&iter, parameters);
+ while (g_hash_table_iter_next(&iter, (gpointer *) &key,
+ (gpointer *) &value)) {
+ params = lrmd_key_value_add(params, key, value);
+ }
+ }
+
+ if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
+ rc = controld_execute_remote_agent(lrm_state, rsc_id, action,
+ userdata, interval_ms, timeout_ms,
+ start_delay_ms, params, call_id);
+
+ } else {
+ rc = ((lrmd_t *) lrm_state->conn)->cmds->exec(lrm_state->conn, rsc_id,
+ action, userdata,
+ interval_ms, timeout_ms,
+ start_delay_ms,
+ lrmd_opt_notify_changes_only,
+ params);
+ if (rc < 0) {
+ rc = pcmk_legacy2rc(rc);
+ } else {
+ *call_id = rc;
+ rc = pcmk_rc_ok;
+ }
+ }
+ return rc;
+}
+
+int
+lrm_state_register_rsc(lrm_state_t * lrm_state,
+ const char *rsc_id,
+ const char *class,
+ const char *provider, const char *agent, enum lrmd_call_options options)
+{
+ lrmd_t *conn = (lrmd_t *) lrm_state->conn;
+
+ if (conn == NULL) {
+ return -ENOTCONN;
+ }
+
+ if (is_remote_lrmd_ra(agent, provider, NULL)) {
+ return lrm_state_find_or_create(rsc_id)? pcmk_ok : -EINVAL;
+ }
+
+ /* @TODO Implement an asynchronous version of this (currently a blocking
+ * call to the lrmd).
+ */
+ return conn->cmds->register_rsc(lrm_state->conn, rsc_id, class, provider,
+ agent, options);
+}
+
+int
+lrm_state_unregister_rsc(lrm_state_t * lrm_state,
+ const char *rsc_id, enum lrmd_call_options options)
+{
+ if (!lrm_state->conn) {
+ return -ENOTCONN;
+ }
+
+ if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
+ lrm_state_destroy(rsc_id);
+ return pcmk_ok;
+ }
+
+ g_hash_table_remove(lrm_state->rsc_info_cache, rsc_id);
+
+ /* @TODO Optimize this ... this function is a blocking round trip from
+ * client to daemon. The controld_execd_state.c code path that uses this
+ * function should always treat it as an async operation. The executor API
+ * should make an async version available.
+ */
+ return ((lrmd_t *) lrm_state->conn)->cmds->unregister_rsc(lrm_state->conn, rsc_id, options);
+}
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
new file mode 100644
index 0000000..89cb61f
--- /dev/null
+++ b/daemons/controld/controld_fencing.c
@@ -0,0 +1,1108 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/stonith-ng.h>
+#include <crm/fencing/internal.h>
+
+#include <pacemaker-controld.h>
+
+static void
+tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
+
+/*
+ * stonith failure counting
+ *
+ * We don't want to get stuck in a permanent fencing loop. Keep track of the
+ * number of fencing failures for each target node, and the most we'll restart a
+ * transition for.
+ */
+
+struct st_fail_rec {
+ int count;
+};
+
+static bool fence_reaction_panic = false;
+static unsigned long int stonith_max_attempts = 10;
+static GHashTable *stonith_failures = NULL;
+
+/*!
+ * \internal
+ * \brief Update max fencing attempts before giving up
+ *
+ * \param[in] value New max fencing attempts
+ */
+static void
+update_stonith_max_attempts(const char *value)
+{
+ stonith_max_attempts = char2score(value);
+ if (stonith_max_attempts < 1UL) {
+ stonith_max_attempts = 10UL;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Configure reaction to notification of local node being fenced
+ *
+ * \param[in] reaction_s Reaction type
+ */
+static void
+set_fence_reaction(const char *reaction_s)
+{
+ if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) {
+ fence_reaction_panic = true;
+
+ } else {
+ if (!pcmk__str_eq(reaction_s, "stop", pcmk__str_casei)) {
+ crm_warn("Invalid value '%s' for %s, using 'stop'",
+ reaction_s, XML_CONFIG_ATTR_FENCE_REACTION);
+ }
+ fence_reaction_panic = false;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Configure fencing options based on the CIB
+ *
+ * \param[in,out] options Name/value pairs for configured options
+ */
+void
+controld_configure_fencing(GHashTable *options)
+{
+ const char *value = NULL;
+
+ value = g_hash_table_lookup(options, XML_CONFIG_ATTR_FENCE_REACTION);
+ set_fence_reaction(value);
+
+ value = g_hash_table_lookup(options, "stonith-max-attempts");
+ update_stonith_max_attempts(value);
+}
+
+static gboolean
+too_many_st_failures(const char *target)
+{
+ GHashTableIter iter;
+ const char *key = NULL;
+ struct st_fail_rec *value = NULL;
+
+ if (stonith_failures == NULL) {
+ return FALSE;
+ }
+
+ if (target == NULL) {
+ g_hash_table_iter_init(&iter, stonith_failures);
+ while (g_hash_table_iter_next(&iter, (gpointer *) &key,
+ (gpointer *) &value)) {
+
+ if (value->count >= stonith_max_attempts) {
+ target = (const char*)key;
+ goto too_many;
+ }
+ }
+ } else {
+ value = g_hash_table_lookup(stonith_failures, target);
+ if ((value != NULL) && (value->count >= stonith_max_attempts)) {
+ goto too_many;
+ }
+ }
+ return FALSE;
+
+too_many:
+ crm_warn("Too many failures (%d) to fence %s, giving up",
+ value->count, target);
+ return TRUE;
+}
+
+/*!
+ * \internal
+ * \brief Reset a stonith fail count
+ *
+ * \param[in] target Name of node to reset, or NULL for all
+ */
+void
+st_fail_count_reset(const char *target)
+{
+ if (stonith_failures == NULL) {
+ return;
+ }
+
+ if (target) {
+ struct st_fail_rec *rec = NULL;
+
+ rec = g_hash_table_lookup(stonith_failures, target);
+ if (rec) {
+ rec->count = 0;
+ }
+ } else {
+ GHashTableIter iter;
+ const char *key = NULL;
+ struct st_fail_rec *rec = NULL;
+
+ g_hash_table_iter_init(&iter, stonith_failures);
+ while (g_hash_table_iter_next(&iter, (gpointer *) &key,
+ (gpointer *) &rec)) {
+ rec->count = 0;
+ }
+ }
+}
+
+static void
+st_fail_count_increment(const char *target)
+{
+ struct st_fail_rec *rec = NULL;
+
+ if (stonith_failures == NULL) {
+ stonith_failures = pcmk__strkey_table(free, free);
+ }
+
+ rec = g_hash_table_lookup(stonith_failures, target);
+ if (rec) {
+ rec->count++;
+ } else {
+ rec = malloc(sizeof(struct st_fail_rec));
+ if(rec == NULL) {
+ return;
+ }
+
+ rec->count = 1;
+ g_hash_table_insert(stonith_failures, strdup(target), rec);
+ }
+}
+
+/* end stonith fail count functions */
+
+
+static void
+cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
+ void *user_data)
+{
+ if (rc < pcmk_ok) {
+ crm_err("Fencing update %d for %s: failed - %s (%d)",
+ call_id, (char *)user_data, pcmk_strerror(rc), rc);
+ crm_log_xml_warn(msg, "Failed update");
+ abort_transition(INFINITY, pcmk__graph_shutdown, "CIB update failed",
+ NULL);
+
+ } else {
+ crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
+ }
+}
+
+static void
+send_stonith_update(pcmk__graph_action_t *action, const char *target,
+ const char *uuid)
+{
+ int rc = pcmk_ok;
+ crm_node_t *peer = NULL;
+
+ /* We (usually) rely on the membership layer to do node_update_cluster,
+ * and the peer status callback to do node_update_peer, because the node
+ * might have already rejoined before we get the stonith result here.
+ */
+ int flags = node_update_join | node_update_expected;
+
+ /* zero out the node-status & remove all LRM status info */
+ xmlNode *node_state = NULL;
+
+ CRM_CHECK(target != NULL, return);
+ CRM_CHECK(uuid != NULL, return);
+
+ /* Make sure the membership and join caches are accurate */
+ peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY);
+
+ CRM_CHECK(peer != NULL, return);
+
+ if (peer->state == NULL) {
+ /* Usually, we rely on the membership layer to update the cluster state
+ * in the CIB. However, if the node has never been seen, do it here, so
+ * the node is not considered unclean.
+ */
+ flags |= node_update_cluster;
+ }
+
+ if (peer->uuid == NULL) {
+ crm_info("Recording uuid '%s' for node '%s'", uuid, target);
+ peer->uuid = strdup(uuid);
+ }
+
+ crmd_peer_down(peer, TRUE);
+
+ /* Generate a node state update for the CIB */
+ node_state = create_node_state_update(peer, flags, NULL, __func__);
+
+ /* we have to mark whether or not remote nodes have already been fenced */
+ if (peer->flags & crm_remote_node) {
+ char *now_s = pcmk__ttoa(time(NULL));
+
+ crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s);
+ free(now_s);
+ }
+
+ /* Force our known ID */
+ crm_xml_add(node_state, XML_ATTR_ID, uuid);
+
+ rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
+ XML_CIB_TAG_STATUS, node_state,
+ cib_scope_local
+ |cib_can_create);
+
+ /* Delay processing the trigger until the update completes */
+ crm_debug("Sending fencing update %d for %s", rc, target);
+ fsa_register_cib_callback(rc, strdup(target), cib_fencing_updated);
+
+ // Make sure it sticks
+ /* controld_globals.cib_conn->cmds->bump_epoch(controld_globals.cib_conn,
+ * cib_scope_local);
+ */
+
+ controld_delete_node_state(peer->uname, controld_section_all,
+ cib_scope_local);
+ free_xml(node_state);
+ return;
+}
+
+/*!
+ * \internal
+ * \brief Abort transition due to stonith failure
+ *
+ * \param[in] abort_action Whether to restart or stop transition
+ * \param[in] target Don't restart if this (NULL for any) has too many failures
+ * \param[in] reason Log this stonith action XML as abort reason (or NULL)
+ */
+static void
+abort_for_stonith_failure(enum pcmk__graph_next abort_action,
+ const char *target, const xmlNode *reason)
+{
+ /* If stonith repeatedly fails, we eventually give up on starting a new
+ * transition for that reason.
+ */
+ if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) {
+ abort_action = pcmk__graph_wait;
+ }
+ abort_transition(INFINITY, abort_action, "Stonith failed", reason);
+}
+
+
+/*
+ * stonith cleanup list
+ *
+ * If the DC is shot, proper notifications might not go out.
+ * The stonith cleanup list allows the cluster to (re-)send
+ * notifications once a new DC is elected.
+ */
+
+static GList *stonith_cleanup_list = NULL;
+
+/*!
+ * \internal
+ * \brief Add a node to the stonith cleanup list
+ *
+ * \param[in] target Name of node to add
+ */
+void
+add_stonith_cleanup(const char *target) {
+ stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
+}
+
+/*!
+ * \internal
+ * \brief Remove a node from the stonith cleanup list
+ *
+ * \param[in] Name of node to remove
+ */
+void
+remove_stonith_cleanup(const char *target)
+{
+ GList *iter = stonith_cleanup_list;
+
+ while (iter != NULL) {
+ GList *tmp = iter;
+ char *iter_name = tmp->data;
+
+ iter = iter->next;
+ if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) {
+ crm_trace("Removing %s from the cleanup list", iter_name);
+ stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
+ free(iter_name);
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Purge all entries from the stonith cleanup list
+ */
+void
+purge_stonith_cleanup(void)
+{
+ if (stonith_cleanup_list) {
+ GList *iter = NULL;
+
+ for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
+ char *target = iter->data;
+
+ crm_info("Purging %s from stonith cleanup list", target);
+ free(target);
+ }
+ g_list_free(stonith_cleanup_list);
+ stonith_cleanup_list = NULL;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Send stonith updates for all entries in cleanup list, then purge it
+ */
+void
+execute_stonith_cleanup(void)
+{
+ GList *iter;
+
+ for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
+ char *target = iter->data;
+ crm_node_t *target_node = crm_get_peer(0, target);
+ const char *uuid = crm_peer_uuid(target_node);
+
+ crm_notice("Marking %s, target of a previous stonith action, as clean", target);
+ send_stonith_update(NULL, target, uuid);
+ free(target);
+ }
+ g_list_free(stonith_cleanup_list);
+ stonith_cleanup_list = NULL;
+}
+
+/* end stonith cleanup list functions */
+
+
+/* stonith API client
+ *
+ * Functions that need to interact directly with the fencer via its API
+ */
+
+static stonith_t *stonith_api = NULL;
+static crm_trigger_t *stonith_reconnect = NULL;
+static char *te_client_id = NULL;
+
+static gboolean
+fail_incompletable_stonith(pcmk__graph_t *graph)
+{
+ GList *lpc = NULL;
+ const char *task = NULL;
+ xmlNode *last_action = NULL;
+
+ if (graph == NULL) {
+ return FALSE;
+ }
+
+ for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
+ GList *lpc2 = NULL;
+ pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
+
+ if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
+ continue;
+ }
+
+ for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
+ pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data;
+
+ if ((action->type != pcmk__cluster_graph_action)
+ || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
+ continue;
+ }
+
+ task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
+ if (task && pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
+ last_action = action->xml;
+ pcmk__update_graph(graph, action);
+ crm_notice("Failing action %d (%s): fencer terminated",
+ action->id, ID(action->xml));
+ }
+ }
+ }
+
+ if (last_action != NULL) {
+ crm_warn("Fencer failure resulted in unrunnable actions");
+ abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static void
+tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
+{
+ te_cleanup_stonith_history_sync(st, FALSE);
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) {
+ crm_crit("Fencing daemon connection failed");
+ mainloop_set_trigger(stonith_reconnect);
+
+ } else {
+ crm_info("Fencing daemon disconnected");
+ }
+
+ if (stonith_api) {
+ /* the client API won't properly reconnect notifications
+ * if they are still in the table - so remove them
+ */
+ if (stonith_api->state != stonith_disconnected) {
+ stonith_api->cmds->disconnect(st);
+ }
+ stonith_api->cmds->remove_notification(stonith_api, NULL);
+ }
+
+ if (AM_I_DC) {
+ fail_incompletable_stonith(controld_globals.transition_graph);
+ trigger_graph();
+ }
+}
+
+/*!
+ * \internal
+ * \brief Handle an event notification from the fencing API
+ *
+ * \param[in] st Fencing API connection (ignored)
+ * \param[in] event Fencing API event notification
+ */
+static void
+handle_fence_notification(stonith_t *st, stonith_event_t *event)
+{
+ bool succeeded = true;
+ const char *executioner = "the cluster";
+ const char *client = "a client";
+ const char *reason = NULL;
+ int exec_status;
+
+ if (te_client_id == NULL) {
+ te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
+ (unsigned long) getpid());
+ }
+
+ if (event == NULL) {
+ crm_err("Notify data not found");
+ return;
+ }
+
+ if (event->executioner != NULL) {
+ executioner = event->executioner;
+ }
+ if (event->client_origin != NULL) {
+ client = event->client_origin;
+ }
+
+ exec_status = stonith__event_execution_status(event);
+ if ((stonith__event_exit_status(event) != CRM_EX_OK)
+ || (exec_status != PCMK_EXEC_DONE)) {
+ succeeded = false;
+ if (exec_status == PCMK_EXEC_DONE) {
+ exec_status = PCMK_EXEC_ERROR;
+ }
+ }
+ reason = stonith__event_exit_reason(event);
+
+ crmd_alert_fencing_op(event);
+
+ if (pcmk__str_eq("on", event->action, pcmk__str_none)) {
+ // Unfencing doesn't need special handling, just a log message
+ if (succeeded) {
+ crm_notice("%s was unfenced by %s at the request of %s@%s",
+ event->target, executioner, client, event->origin);
+ } else {
+ crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
+ event->target, executioner,
+ pcmk_exec_status_str(exec_status),
+ ((reason == NULL)? "" : ": "),
+ ((reason == NULL)? "" : reason),
+ stonith__event_exit_status(event));
+ }
+ return;
+ }
+
+ if (succeeded
+ && pcmk__str_eq(event->target, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ /* We were notified of our own fencing. Most likely, either fencing was
+ * misconfigured, or fabric fencing that doesn't cut cluster
+ * communication is in use.
+ *
+ * Either way, shutting down the local host is a good idea, to require
+ * administrator intervention. Also, other nodes would otherwise likely
+ * set our status to lost because of the fencing callback and discard
+ * our subsequent election votes as "not part of our cluster".
+ */
+ crm_crit("We were allegedly just fenced by %s for %s!",
+ executioner, event->origin); // Dumps blackbox if enabled
+ if (fence_reaction_panic) {
+ pcmk__panic(__func__);
+ } else {
+ crm_exit(CRM_EX_FATAL);
+ }
+ return; // Should never get here
+ }
+
+ /* Update the count of fencing failures for this target, in case we become
+ * DC later. The current DC has already updated its fail count in
+ * tengine_stonith_callback().
+ */
+ if (!AM_I_DC) {
+ if (succeeded) {
+ st_fail_count_reset(event->target);
+ } else {
+ st_fail_count_increment(event->target);
+ }
+ }
+
+ crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
+ "%s%s%s%s " CRM_XS " event=%s",
+ event->target, (succeeded? "" : " not"),
+ event->action, executioner, client, event->origin,
+ (succeeded? "OK" : pcmk_exec_status_str(exec_status)),
+ ((reason == NULL)? "" : " ("),
+ ((reason == NULL)? "" : reason),
+ ((reason == NULL)? "" : ")"),
+ event->id);
+
+ if (succeeded) {
+ crm_node_t *peer = pcmk__search_known_node_cache(0, event->target,
+ CRM_GET_PEER_ANY);
+ const char *uuid = NULL;
+
+ if (peer == NULL) {
+ return;
+ }
+
+ uuid = crm_peer_uuid(peer);
+
+ if (AM_I_DC) {
+ /* The DC always sends updates */
+ send_stonith_update(NULL, event->target, uuid);
+
+ /* @TODO Ideally, at this point, we'd check whether the fenced node
+ * hosted any guest nodes, and call remote_node_down() for them.
+ * Unfortunately, the controller doesn't have a simple, reliable way
+ * to map hosts to guests. It might be possible to track this in the
+ * peer cache via crm_remote_peer_cache_refresh(). For now, we rely
+ * on the scheduler creating fence pseudo-events for the guests.
+ */
+
+ if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
+ /* Abort the current transition if it wasn't the cluster that
+ * initiated fencing.
+ */
+ crm_info("External fencing operation from %s fenced %s",
+ client, event->target);
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "External Fencing Operation", NULL);
+ }
+
+ } else if (pcmk__str_eq(controld_globals.dc_name, event->target,
+ pcmk__str_null_matches|pcmk__str_casei)
+ && !pcmk_is_set(peer->flags, crm_remote_node)) {
+ // Assume the target was our DC if we don't currently have one
+
+ if (controld_globals.dc_name != NULL) {
+ crm_notice("Fencing target %s was our DC", event->target);
+ } else {
+ crm_notice("Fencing target %s may have been our DC",
+ event->target);
+ }
+
+ /* Given the CIB resyncing that occurs around elections,
+ * have one node update the CIB now and, if the new DC is different,
+ * have them do so too after the election
+ */
+ if (pcmk__str_eq(event->executioner, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ send_stonith_update(NULL, event->target, uuid);
+ }
+ add_stonith_cleanup(event->target);
+ }
+
+ /* If the target is a remote node, and we host its connection,
+ * immediately fail all monitors so it can be recovered quickly.
+ * The connection won't necessarily drop when a remote node is fenced,
+ * so the failure might not otherwise be detected until the next poke.
+ */
+ if (pcmk_is_set(peer->flags, crm_remote_node)) {
+ remote_ra_fail(event->target);
+ }
+
+ crmd_peer_down(peer, TRUE);
+ }
+}
+
+/*!
+ * \brief Connect to fencer
+ *
+ * \param[in] user_data If NULL, retry failures now, otherwise retry in main loop
+ *
+ * \return TRUE
+ * \note If user_data is NULL, this will wait 2s between attempts, for up to
+ * 30 attempts, meaning the controller could be blocked as long as 58s.
+ */
+static gboolean
+te_connect_stonith(gpointer user_data)
+{
+ int rc = pcmk_ok;
+
+ if (stonith_api == NULL) {
+ stonith_api = stonith_api_new();
+ if (stonith_api == NULL) {
+ crm_err("Could not connect to fencer: API memory allocation failed");
+ return TRUE;
+ }
+ }
+
+ if (stonith_api->state != stonith_disconnected) {
+ crm_trace("Already connected to fencer, no need to retry");
+ return TRUE;
+ }
+
+ if (user_data == NULL) {
+ // Blocking (retry failures now until successful)
+ rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
+ if (rc != pcmk_ok) {
+ crm_err("Could not connect to fencer in 30 attempts: %s "
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
+ }
+ } else {
+ // Non-blocking (retry failures later in main loop)
+ rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
+ if (rc != pcmk_ok) {
+ if (pcmk_is_set(controld_globals.fsa_input_register,
+ R_ST_REQUIRED)) {
+ crm_notice("Fencer connection failed (will retry): %s "
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
+ mainloop_set_trigger(stonith_reconnect);
+ } else {
+ crm_info("Fencer connection failed (ignoring because no longer required): %s "
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
+ }
+ return TRUE;
+ }
+ }
+
+ if (rc == pcmk_ok) {
+ stonith_api->cmds->register_notification(stonith_api,
+ T_STONITH_NOTIFY_DISCONNECT,
+ tengine_stonith_connection_destroy);
+ stonith_api->cmds->register_notification(stonith_api,
+ T_STONITH_NOTIFY_FENCE,
+ handle_fence_notification);
+ stonith_api->cmds->register_notification(stonith_api,
+ T_STONITH_NOTIFY_HISTORY_SYNCED,
+ tengine_stonith_history_synced);
+ te_trigger_stonith_history_sync(TRUE);
+ crm_notice("Fencer successfully connected");
+ }
+
+ return TRUE;
+}
+
+/*!
+ \internal
+ \brief Schedule fencer connection attempt in main loop
+*/
+void
+controld_trigger_fencer_connect(void)
+{
+ if (stonith_reconnect == NULL) {
+ stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
+ te_connect_stonith,
+ GINT_TO_POINTER(TRUE));
+ }
+ controld_set_fsa_input_flags(R_ST_REQUIRED);
+ mainloop_set_trigger(stonith_reconnect);
+}
+
+void
+controld_disconnect_fencer(bool destroy)
+{
+ if (stonith_api) {
+ // Prevent fencer connection from coming up again
+ controld_clear_fsa_input_flags(R_ST_REQUIRED);
+
+ if (stonith_api->state != stonith_disconnected) {
+ stonith_api->cmds->disconnect(stonith_api);
+ }
+ stonith_api->cmds->remove_notification(stonith_api, NULL);
+ }
+ if (destroy) {
+ if (stonith_api) {
+ stonith_api->cmds->free(stonith_api);
+ stonith_api = NULL;
+ }
+ if (stonith_reconnect) {
+ mainloop_destroy_trigger(stonith_reconnect);
+ stonith_reconnect = NULL;
+ }
+ if (te_client_id) {
+ free(te_client_id);
+ te_client_id = NULL;
+ }
+ }
+}
+
+static gboolean
+do_stonith_history_sync(gpointer user_data)
+{
+ if (stonith_api && (stonith_api->state != stonith_disconnected)) {
+ stonith_history_t *history = NULL;
+
+ te_cleanup_stonith_history_sync(stonith_api, FALSE);
+ stonith_api->cmds->history(stonith_api,
+ st_opt_sync_call | st_opt_broadcast,
+ NULL, &history, 5);
+ stonith_history_free(history);
+ return TRUE;
+ } else {
+ crm_info("Skip triggering stonith history-sync as stonith is disconnected");
+ return FALSE;
+ }
+}
+
+static void
+tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
+{
+ char *uuid = NULL;
+ int stonith_id = -1;
+ int transition_id = -1;
+ pcmk__graph_action_t *action = NULL;
+ const char *target = NULL;
+
+ if ((data == NULL) || (data->userdata == NULL)) {
+ crm_err("Ignoring fence operation %d result: "
+ "No transition key given (bug?)",
+ ((data == NULL)? -1 : data->call_id));
+ return;
+ }
+
+ if (!AM_I_DC) {
+ const char *reason = stonith__exit_reason(data);
+
+ if (reason == NULL) {
+ reason = pcmk_exec_status_str(stonith__execution_status(data));
+ }
+ crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s",
+ data->call_id, stonith__exit_status(data), reason,
+ (const char *) data->userdata);
+ return;
+ }
+
+ CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
+ &stonith_id, NULL),
+ goto bail);
+
+ if (controld_globals.transition_graph->complete || (stonith_id < 0)
+ || !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none)
+ || (controld_globals.transition_graph->id != transition_id)) {
+ crm_info("Ignoring fence operation %d result: "
+ "Not from current transition " CRM_XS
+ " complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
+ data->call_id,
+ pcmk__btoa(controld_globals.transition_graph->complete),
+ stonith_id, uuid, controld_globals.te_uuid, transition_id,
+ controld_globals.transition_graph->id);
+ goto bail;
+ }
+
+ action = controld_get_action(stonith_id);
+ if (action == NULL) {
+ crm_err("Ignoring fence operation %d result: "
+ "Action %d not found in transition graph (bug?) "
+ CRM_XS " uuid=%s transition=%d",
+ data->call_id, stonith_id, uuid, transition_id);
+ goto bail;
+ }
+
+ target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ if (target == NULL) {
+ crm_err("Ignoring fence operation %d result: No target given (bug?)",
+ data->call_id);
+ goto bail;
+ }
+
+ stop_te_timer(action);
+ if (stonith__exit_status(data) == CRM_EX_OK) {
+ const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
+ const char *op = crm_meta_value(action->params, "stonith_action");
+
+ crm_info("Fence operation %d for %s succeeded", data->call_id, target);
+ if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
+ te_action_confirmed(action, NULL);
+ if (pcmk__str_eq("on", op, pcmk__str_casei)) {
+ const char *value = NULL;
+ char *now = pcmk__ttoa(time(NULL));
+ gboolean is_remote_node = FALSE;
+
+ /* This check is not 100% reliable, since this node is not
+ * guaranteed to have the remote node cached. However, it
+ * doesn't have to be reliable, since the attribute manager can
+ * learn a node's "remoteness" by other means sooner or later.
+ * This allows it to learn more quickly if this node does have
+ * the information.
+ */
+ if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) {
+ is_remote_node = TRUE;
+ }
+
+ update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
+ is_remote_node);
+ free(now);
+
+ value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
+ update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
+ is_remote_node);
+
+ value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
+ update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
+ is_remote_node);
+
+ } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
+ send_stonith_update(action, target, uuid);
+ pcmk__set_graph_action_flags(action,
+ pcmk__graph_action_sent_update);
+ }
+ }
+ st_fail_count_reset(target);
+
+ } else {
+ enum pcmk__graph_next abort_action = pcmk__graph_restart;
+ int status = stonith__execution_status(data);
+ const char *reason = stonith__exit_reason(data);
+
+ if (reason == NULL) {
+ if (status == PCMK_EXEC_DONE) {
+ reason = "Agent returned error";
+ } else {
+ reason = pcmk_exec_status_str(status);
+ }
+ }
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
+
+ /* If no fence devices were available, there's no use in immediately
+ * checking again, so don't start a new transition in that case.
+ */
+ if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
+ crm_warn("Fence operation %d for %s failed: %s "
+ "(aborting transition and giving up for now)",
+ data->call_id, target, reason);
+ abort_action = pcmk__graph_wait;
+ } else {
+ crm_notice("Fence operation %d for %s failed: %s "
+ "(aborting transition)", data->call_id, target, reason);
+ }
+
+ /* Increment the fail count now, so abort_for_stonith_failure() can
+ * check it. Non-DC nodes will increment it in
+ * handle_fence_notification().
+ */
+ st_fail_count_increment(target);
+ abort_for_stonith_failure(abort_action, target, NULL);
+ }
+
+ pcmk__update_graph(controld_globals.transition_graph, action);
+ trigger_graph();
+
+ bail:
+ free(data->userdata);
+ free(uuid);
+ return;
+}
+
+static int
+fence_with_delay(const char *target, const char *type, int delay)
+{
+ uint32_t options = st_opt_none; // Group of enum stonith_call_options
+ int timeout_sec = (int) (controld_globals.transition_graph->stonith_timeout
+ / 1000);
+
+ if (crmd_join_phase_count(crm_join_confirmed) == 1) {
+ stonith__set_call_options(options, target, st_opt_allow_suicide);
+ }
+ return stonith_api->cmds->fence_with_delay(stonith_api, options, target,
+ type, timeout_sec, 0, delay);
+}
+
+/*!
+ * \internal
+ * \brief Execute a fencing action from a transition graph
+ *
+ * \param[in] graph Transition graph being executed (ignored)
+ * \param[in] action Fencing action to execute
+ *
+ * \return Standard Pacemaker return code
+ */
+int
+controld_execute_fence_action(pcmk__graph_t *graph,
+ pcmk__graph_action_t *action)
+{
+ int rc = 0;
+ const char *id = ID(action->xml);
+ const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
+ const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ const char *type = crm_meta_value(action->params, "stonith_action");
+ char *transition_key = NULL;
+ const char *priority_delay = NULL;
+ int delay_i = 0;
+ gboolean invalid_action = FALSE;
+ int stonith_timeout = (int) (controld_globals.transition_graph->stonith_timeout
+ / 1000);
+
+ CRM_CHECK(id != NULL, invalid_action = TRUE);
+ CRM_CHECK(uuid != NULL, invalid_action = TRUE);
+ CRM_CHECK(type != NULL, invalid_action = TRUE);
+ CRM_CHECK(target != NULL, invalid_action = TRUE);
+
+ if (invalid_action) {
+ crm_log_xml_warn(action->xml, "BadAction");
+ return EPROTO;
+ }
+
+ priority_delay = crm_meta_value(action->params, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY);
+
+ crm_notice("Requesting fencing (%s) targeting node %s "
+ CRM_XS " action=%s timeout=%i%s%s",
+ type, target, id, stonith_timeout,
+ priority_delay ? " priority_delay=" : "",
+ priority_delay ? priority_delay : "");
+
+ /* Passing NULL means block until we can connect... */
+ te_connect_stonith(NULL);
+
+ pcmk__scan_min_int(priority_delay, &delay_i, 0);
+ rc = fence_with_delay(target, type, delay_i);
+ transition_key = pcmk__transition_key(controld_globals.transition_graph->id,
+ action->id, 0,
+ controld_globals.te_uuid),
+ stonith_api->cmds->register_callback(stonith_api, rc,
+ (stonith_timeout
+ + (delay_i > 0 ? delay_i : 0)),
+ st_opt_timeout_updates, transition_key,
+ "tengine_stonith_callback",
+ tengine_stonith_callback);
+ return pcmk_rc_ok;
+}
+
+bool
+controld_verify_stonith_watchdog_timeout(const char *value)
+{
+ const char *our_nodename = controld_globals.our_nodename;
+ gboolean rv = TRUE;
+
+ if (stonith_api && (stonith_api->state != stonith_disconnected) &&
+ stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
+ our_nodename)) {
+ rv = pcmk__valid_sbd_timeout(value);
+ }
+ return rv;
+}
+
+/* end stonith API client functions */
+
+
+/*
+ * stonith history synchronization
+ *
+ * Each node's fencer keeps track of a cluster-wide fencing history. When a node
+ * joins or leaves, we need to synchronize the history across all nodes.
+ */
+
+static crm_trigger_t *stonith_history_sync_trigger = NULL;
+static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
+static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
+
+void
+te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
+{
+ if (free_timers) {
+ mainloop_timer_del(stonith_history_sync_timer_short);
+ stonith_history_sync_timer_short = NULL;
+ mainloop_timer_del(stonith_history_sync_timer_long);
+ stonith_history_sync_timer_long = NULL;
+ } else {
+ mainloop_timer_stop(stonith_history_sync_timer_short);
+ mainloop_timer_stop(stonith_history_sync_timer_long);
+ }
+
+ if (st) {
+ st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED);
+ }
+}
+
+static void
+tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
+{
+ te_cleanup_stonith_history_sync(st, FALSE);
+ crm_debug("Fence-history synced - cancel all timers");
+}
+
+static gboolean
+stonith_history_sync_set_trigger(gpointer user_data)
+{
+ mainloop_set_trigger(stonith_history_sync_trigger);
+ return FALSE;
+}
+
+void
+te_trigger_stonith_history_sync(bool long_timeout)
+{
+ /* trigger a sync in 5s to give more nodes the
+ * chance to show up so that we don't create
+ * unnecessary stonith-history-sync traffic
+ *
+ * the long timeout of 30s is there as a fallback
+ * so that after a successful connection to fenced
+ * we will wait for 30s for the DC to trigger a
+ * history-sync
+ * if this doesn't happen we trigger a sync locally
+ * (e.g. fenced segfaults and is restarted by pacemakerd)
+ */
+
+ /* as we are finally checking the stonith-connection
+ * in do_stonith_history_sync we should be fine
+ * leaving stonith_history_sync_time & stonith_history_sync_trigger
+ * around
+ */
+ if (stonith_history_sync_trigger == NULL) {
+ stonith_history_sync_trigger =
+ mainloop_add_trigger(G_PRIORITY_LOW,
+ do_stonith_history_sync, NULL);
+ }
+
+ if (long_timeout) {
+ if(stonith_history_sync_timer_long == NULL) {
+ stonith_history_sync_timer_long =
+ mainloop_timer_add("history_sync_long", 30000,
+ FALSE, stonith_history_sync_set_trigger,
+ NULL);
+ }
+ crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
+ mainloop_timer_start(stonith_history_sync_timer_long);
+ } else {
+ if(stonith_history_sync_timer_short == NULL) {
+ stonith_history_sync_timer_short =
+ mainloop_timer_add("history_sync_short", 5000,
+ FALSE, stonith_history_sync_set_trigger,
+ NULL);
+ }
+ crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
+ mainloop_timer_start(stonith_history_sync_timer_short);
+ }
+
+}
+
+/* end stonith history synchronization functions */
diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h
new file mode 100644
index 0000000..86a5050
--- /dev/null
+++ b/daemons/controld/controld_fencing.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef CONTROLD_FENCING__H
+# define CONTROLD_FENCING__H
+
+#include <stdbool.h> // bool
+#include <pacemaker-internal.h> // pcmk__graph_t, pcmk__graph_action_t
+
+void controld_configure_fencing(GHashTable *options);
+
+// stonith fail counts
+void st_fail_count_reset(const char * target);
+
+// stonith API client
+void controld_trigger_fencer_connect(void);
+void controld_disconnect_fencer(bool destroy);
+int controld_execute_fence_action(pcmk__graph_t *graph,
+ pcmk__graph_action_t *action);
+bool controld_verify_stonith_watchdog_timeout(const char *value);
+
+// stonith cleanup list
+void add_stonith_cleanup(const char *target);
+void remove_stonith_cleanup(const char *target);
+void purge_stonith_cleanup(void);
+void execute_stonith_cleanup(void);
+
+// stonith history synchronization
+void te_trigger_stonith_history_sync(bool long_timeout);
+void te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers);
+
+#endif
diff --git a/daemons/controld/controld_fsa.c b/daemons/controld/controld_fsa.c
new file mode 100644
index 0000000..622d1c8
--- /dev/null
+++ b/daemons/controld/controld_fsa.c
@@ -0,0 +1,741 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <stdio.h>
+#include <stdint.h> // uint64_t
+#include <string.h>
+#include <time.h>
+
+#include <crm/crm.h>
+#include <crm/lrmd.h>
+#include <crm/cib.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/cluster/election_internal.h>
+#include <crm/cluster.h>
+
+#include <pacemaker-controld.h>
+
+//! Triggers an FSA invocation
+static crm_trigger_t *fsa_trigger = NULL;
+
+#define DOT_PREFIX "actions:trace: "
+#define do_dot_log(fmt, args...) crm_trace( fmt, ##args)
+
+static void do_state_transition(enum crmd_fsa_state cur_state,
+ enum crmd_fsa_state next_state,
+ fsa_data_t *msg_data);
+
+void s_crmd_fsa_actions(fsa_data_t * fsa_data);
+void log_fsa_input(fsa_data_t * stored_msg);
+void init_dotfile(void);
+
+void
+init_dotfile(void)
+{
+ do_dot_log(DOT_PREFIX "digraph \"g\" {");
+ do_dot_log(DOT_PREFIX " size = \"30,30\"");
+ do_dot_log(DOT_PREFIX " graph [");
+ do_dot_log(DOT_PREFIX " fontsize = \"12\"");
+ do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\"");
+ do_dot_log(DOT_PREFIX " fontcolor = \"black\"");
+ do_dot_log(DOT_PREFIX " bb = \"0,0,398.922306,478.927856\"");
+ do_dot_log(DOT_PREFIX " color = \"black\"");
+ do_dot_log(DOT_PREFIX " ]");
+ do_dot_log(DOT_PREFIX " node [");
+ do_dot_log(DOT_PREFIX " fontsize = \"12\"");
+ do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\"");
+ do_dot_log(DOT_PREFIX " fontcolor = \"black\"");
+ do_dot_log(DOT_PREFIX " shape = \"ellipse\"");
+ do_dot_log(DOT_PREFIX " color = \"black\"");
+ do_dot_log(DOT_PREFIX " ]");
+ do_dot_log(DOT_PREFIX " edge [");
+ do_dot_log(DOT_PREFIX " fontsize = \"12\"");
+ do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\"");
+ do_dot_log(DOT_PREFIX " fontcolor = \"black\"");
+ do_dot_log(DOT_PREFIX " color = \"black\"");
+ do_dot_log(DOT_PREFIX " ]");
+ do_dot_log(DOT_PREFIX "// special nodes");
+ do_dot_log(DOT_PREFIX " \"S_PENDING\" ");
+ do_dot_log(DOT_PREFIX " [");
+ do_dot_log(DOT_PREFIX " color = \"blue\"");
+ do_dot_log(DOT_PREFIX " fontcolor = \"blue\"");
+ do_dot_log(DOT_PREFIX " ]");
+ do_dot_log(DOT_PREFIX " \"S_TERMINATE\" ");
+ do_dot_log(DOT_PREFIX " [");
+ do_dot_log(DOT_PREFIX " color = \"red\"");
+ do_dot_log(DOT_PREFIX " fontcolor = \"red\"");
+ do_dot_log(DOT_PREFIX " ]");
+ do_dot_log(DOT_PREFIX "// DC only nodes");
+ do_dot_log(DOT_PREFIX " \"S_INTEGRATION\" [ fontcolor = \"green\" ]");
+ do_dot_log(DOT_PREFIX " \"S_POLICY_ENGINE\" [ fontcolor = \"green\" ]");
+ do_dot_log(DOT_PREFIX " \"S_TRANSITION_ENGINE\" [ fontcolor = \"green\" ]");
+ do_dot_log(DOT_PREFIX " \"S_RELEASE_DC\" [ fontcolor = \"green\" ]");
+ do_dot_log(DOT_PREFIX " \"S_IDLE\" [ fontcolor = \"green\" ]");
+}
+
+static void
+do_fsa_action(fsa_data_t * fsa_data, long long an_action,
+ void (*function) (long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t * msg_data))
+{
+ controld_clear_fsa_action_flags(an_action);
+ crm_trace(DOT_PREFIX "\t// %s", fsa_action2string(an_action));
+ function(an_action, fsa_data->fsa_cause, controld_globals.fsa_state,
+ fsa_data->fsa_input, fsa_data);
+}
+
+static const uint64_t startup_actions =
+ A_STARTUP | A_CIB_START | A_LRM_CONNECT | A_HA_CONNECT | A_READCONFIG |
+ A_STARTED | A_CL_JOIN_QUERY;
+
+// A_LOG, A_WARN, A_ERROR
+void
+do_log(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t *msg_data)
+{
+ unsigned log_type = LOG_TRACE;
+
+ if (action & A_LOG) {
+ log_type = LOG_INFO;
+ } else if (action & A_WARN) {
+ log_type = LOG_WARNING;
+ } else if (action & A_ERROR) {
+ log_type = LOG_ERR;
+ }
+
+ do_crm_log(log_type, "Input %s received in state %s from %s",
+ fsa_input2string(msg_data->fsa_input),
+ fsa_state2string(cur_state), msg_data->origin);
+
+ if (msg_data->data_type == fsa_dt_ha_msg) {
+ ha_msg_input_t *input = fsa_typed_data(msg_data->data_type);
+
+ crm_log_xml_debug(input->msg, __func__);
+
+ } else if (msg_data->data_type == fsa_dt_xml) {
+ xmlNode *input = fsa_typed_data(msg_data->data_type);
+
+ crm_log_xml_debug(input, __func__);
+
+ } else if (msg_data->data_type == fsa_dt_lrm) {
+ lrmd_event_data_t *input = fsa_typed_data(msg_data->data_type);
+
+ do_crm_log(log_type,
+ "Resource %s: Call ID %d returned %d (%d)."
+ " New status if rc=0: %s",
+ input->rsc_id, input->call_id, input->rc,
+ input->op_status, (char *)input->user_data);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Initialize the FSA trigger
+ */
+void
+controld_init_fsa_trigger(void)
+{
+ fsa_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL);
+}
+
+/*!
+ * \internal
+ * \brief Destroy the FSA trigger
+ */
+void
+controld_destroy_fsa_trigger(void)
+{
+ // This basically will not work, since mainloop has a reference to it
+ mainloop_destroy_trigger(fsa_trigger);
+ fsa_trigger = NULL;
+}
+
+/*!
+ * \internal
+ * \brief Trigger an FSA invocation
+ *
+ * \param[in] fn Calling function name
+ * \param[in] line Line number where call occurred
+ */
+void
+controld_trigger_fsa_as(const char *fn, int line)
+{
+ if (fsa_trigger != NULL) {
+ crm_trace("%s:%d - Triggered FSA invocation", fn, line);
+ mainloop_set_trigger(fsa_trigger);
+ }
+}
+
+enum crmd_fsa_state
+s_crmd_fsa(enum crmd_fsa_cause cause)
+{
+ controld_globals_t *globals = &controld_globals;
+ fsa_data_t *fsa_data = NULL;
+ uint64_t register_copy = controld_globals.fsa_input_register;
+ uint64_t new_actions = A_NOTHING;
+ enum crmd_fsa_state last_state;
+
+ crm_trace("FSA invoked with Cause: %s\tState: %s",
+ fsa_cause2string(cause),
+ fsa_state2string(globals->fsa_state));
+
+ fsa_dump_actions(controld_globals.fsa_actions, "Initial");
+
+ controld_clear_global_flags(controld_fsa_is_stalled);
+ if ((controld_globals.fsa_message_queue == NULL)
+ && (controld_globals.fsa_actions != A_NOTHING)) {
+ /* fake the first message so we can get into the loop */
+ fsa_data = calloc(1, sizeof(fsa_data_t));
+ fsa_data->fsa_input = I_NULL;
+ fsa_data->fsa_cause = C_FSA_INTERNAL;
+ fsa_data->origin = __func__;
+ fsa_data->data_type = fsa_dt_none;
+ controld_globals.fsa_message_queue
+ = g_list_append(controld_globals.fsa_message_queue, fsa_data);
+ fsa_data = NULL;
+ }
+ while ((controld_globals.fsa_message_queue != NULL)
+ && !pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) {
+ crm_trace("Checking messages (%d remaining)",
+ g_list_length(controld_globals.fsa_message_queue));
+
+ fsa_data = get_message();
+ if(fsa_data == NULL) {
+ continue;
+ }
+
+ log_fsa_input(fsa_data);
+
+ /* add any actions back to the queue */
+ controld_set_fsa_action_flags(fsa_data->actions);
+ fsa_dump_actions(fsa_data->actions, "Restored actions");
+
+ /* get the next batch of actions */
+ new_actions = controld_fsa_get_action(fsa_data->fsa_input);
+ controld_set_fsa_action_flags(new_actions);
+ fsa_dump_actions(new_actions, "New actions");
+
+ if (fsa_data->fsa_input != I_NULL && fsa_data->fsa_input != I_ROUTER) {
+ crm_debug("Processing %s: [ state=%s cause=%s origin=%s ]",
+ fsa_input2string(fsa_data->fsa_input),
+ fsa_state2string(globals->fsa_state),
+ fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
+ }
+
+ /* logging : *before* the state is changed */
+ if (pcmk_is_set(controld_globals.fsa_actions, A_ERROR)) {
+ do_fsa_action(fsa_data, A_ERROR, do_log);
+ }
+ if (pcmk_is_set(controld_globals.fsa_actions, A_WARN)) {
+ do_fsa_action(fsa_data, A_WARN, do_log);
+ }
+ if (pcmk_is_set(controld_globals.fsa_actions, A_LOG)) {
+ do_fsa_action(fsa_data, A_LOG, do_log);
+ }
+
+ /* update state variables */
+ last_state = globals->fsa_state;
+ globals->fsa_state = controld_fsa_get_next_state(fsa_data->fsa_input);
+
+ /*
+ * Remove certain actions during shutdown
+ */
+ if ((globals->fsa_state == S_STOPPING)
+ || pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ controld_clear_fsa_action_flags(startup_actions);
+ }
+
+ /*
+ * Hook for change of state.
+ * Allows actions to be added or removed when entering a state
+ */
+ if (last_state != globals->fsa_state) {
+ do_state_transition(last_state, globals->fsa_state, fsa_data);
+ } else {
+ do_dot_log(DOT_PREFIX "\t// FSA input: State=%s \tCause=%s"
+ " \tInput=%s \tOrigin=%s() \tid=%d",
+ fsa_state2string(globals->fsa_state),
+ fsa_cause2string(fsa_data->fsa_cause),
+ fsa_input2string(fsa_data->fsa_input), fsa_data->origin, fsa_data->id);
+ }
+
+ /* start doing things... */
+ s_crmd_fsa_actions(fsa_data);
+ delete_fsa_input(fsa_data);
+ fsa_data = NULL;
+ }
+
+ if ((controld_globals.fsa_message_queue != NULL)
+ || (controld_globals.fsa_actions != A_NOTHING)
+ || pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) {
+
+ crm_debug("Exiting the FSA: queue=%d, fsa_actions=%#llx, stalled=%s",
+ g_list_length(controld_globals.fsa_message_queue),
+ (unsigned long long) controld_globals.fsa_actions,
+ pcmk__btoa(pcmk_is_set(controld_globals.flags,
+ controld_fsa_is_stalled)));
+ } else {
+ crm_trace("Exiting the FSA");
+ }
+
+ /* cleanup inputs? */
+ if (register_copy != controld_globals.fsa_input_register) {
+ uint64_t same = register_copy & controld_globals.fsa_input_register;
+
+ fsa_dump_inputs(LOG_DEBUG, "Added",
+ controld_globals.fsa_input_register ^ same);
+ fsa_dump_inputs(LOG_DEBUG, "Removed", register_copy ^ same);
+ }
+
+ fsa_dump_actions(controld_globals.fsa_actions, "Remaining");
+ fsa_dump_queue(LOG_DEBUG);
+
+ return globals->fsa_state;
+}
+
+void
+s_crmd_fsa_actions(fsa_data_t * fsa_data)
+{
+ /*
+ * Process actions in order of priority but do only one
+ * action at a time to avoid complicating the ordering.
+ */
+ CRM_CHECK(fsa_data != NULL, return);
+ while ((controld_globals.fsa_actions != A_NOTHING)
+ && !pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) {
+
+ /* regular action processing in order of action priority
+ *
+ * Make sure all actions that connect to required systems
+ * are performed first
+ */
+ if (pcmk_is_set(controld_globals.fsa_actions, A_ERROR)) {
+ do_fsa_action(fsa_data, A_ERROR, do_log);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_WARN)) {
+ do_fsa_action(fsa_data, A_WARN, do_log);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_LOG)) {
+ do_fsa_action(fsa_data, A_LOG, do_log);
+
+ /* get out of here NOW! before anything worse happens */
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_EXIT_1)) {
+ do_fsa_action(fsa_data, A_EXIT_1, do_exit);
+
+ /* sub-system restart */
+ } else if (pcmk_all_flags_set(controld_globals.fsa_actions,
+ O_LRM_RECONNECT)) {
+ do_fsa_action(fsa_data, O_LRM_RECONNECT, do_lrm_control);
+
+ } else if (pcmk_all_flags_set(controld_globals.fsa_actions,
+ O_CIB_RESTART)) {
+ do_fsa_action(fsa_data, O_CIB_RESTART, do_cib_control);
+
+ } else if (pcmk_all_flags_set(controld_globals.fsa_actions,
+ O_PE_RESTART)) {
+ do_fsa_action(fsa_data, O_PE_RESTART, do_pe_control);
+
+ } else if (pcmk_all_flags_set(controld_globals.fsa_actions,
+ O_TE_RESTART)) {
+ do_fsa_action(fsa_data, O_TE_RESTART, do_te_control);
+
+ /* essential start tasks */
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_STARTUP)) {
+ do_fsa_action(fsa_data, A_STARTUP, do_startup);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_CIB_START)) {
+ do_fsa_action(fsa_data, A_CIB_START, do_cib_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_HA_CONNECT)) {
+ do_fsa_action(fsa_data, A_HA_CONNECT, do_ha_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_READCONFIG)) {
+ do_fsa_action(fsa_data, A_READCONFIG, do_read_config);
+
+ /* sub-system start/connect */
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_CONNECT)) {
+ do_fsa_action(fsa_data, A_LRM_CONNECT, do_lrm_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_START)) {
+ do_fsa_action(fsa_data, A_TE_START, do_te_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_START)) {
+ do_fsa_action(fsa_data, A_PE_START, do_pe_control);
+
+ /* Timers */
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_TIMER_STOP)) {
+ do_fsa_action(fsa_data, A_DC_TIMER_STOP, do_timer_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_INTEGRATE_TIMER_STOP)) {
+ do_fsa_action(fsa_data, A_INTEGRATE_TIMER_STOP, do_timer_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_INTEGRATE_TIMER_START)) {
+ do_fsa_action(fsa_data, A_INTEGRATE_TIMER_START, do_timer_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_FINALIZE_TIMER_STOP)) {
+ do_fsa_action(fsa_data, A_FINALIZE_TIMER_STOP, do_timer_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_FINALIZE_TIMER_START)) {
+ do_fsa_action(fsa_data, A_FINALIZE_TIMER_START, do_timer_control);
+
+ /*
+ * Highest priority actions
+ */
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_MSG_ROUTE)) {
+ do_fsa_action(fsa_data, A_MSG_ROUTE, do_msg_route);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_RECOVER)) {
+ do_fsa_action(fsa_data, A_RECOVER, do_recover);
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_CL_JOIN_RESULT)) {
+ do_fsa_action(fsa_data, A_CL_JOIN_RESULT,
+ do_cl_join_finalize_respond);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_CL_JOIN_REQUEST)) {
+ do_fsa_action(fsa_data, A_CL_JOIN_REQUEST,
+ do_cl_join_offer_respond);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_SHUTDOWN_REQ)) {
+ do_fsa_action(fsa_data, A_SHUTDOWN_REQ, do_shutdown_req);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_ELECTION_VOTE)) {
+ do_fsa_action(fsa_data, A_ELECTION_VOTE, do_election_vote);
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_ELECTION_COUNT)) {
+ do_fsa_action(fsa_data, A_ELECTION_COUNT, do_election_count_vote);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_EVENT)) {
+ do_fsa_action(fsa_data, A_LRM_EVENT, do_lrm_event);
+
+ /*
+ * High priority actions
+ */
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_STARTED)) {
+ do_fsa_action(fsa_data, A_STARTED, do_started);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_CL_JOIN_QUERY)) {
+ do_fsa_action(fsa_data, A_CL_JOIN_QUERY, do_cl_join_query);
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_DC_TIMER_START)) {
+ do_fsa_action(fsa_data, A_DC_TIMER_START, do_timer_control);
+
+ /*
+ * Medium priority actions
+ * - Membership
+ */
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_TAKEOVER)) {
+ do_fsa_action(fsa_data, A_DC_TAKEOVER, do_dc_takeover);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_RELEASE)) {
+ do_fsa_action(fsa_data, A_DC_RELEASE, do_dc_release);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_JOIN_FINAL)) {
+ do_fsa_action(fsa_data, A_DC_JOIN_FINAL, do_dc_join_final);
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_ELECTION_CHECK)) {
+ do_fsa_action(fsa_data, A_ELECTION_CHECK, do_election_check);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_ELECTION_START)) {
+ do_fsa_action(fsa_data, A_ELECTION_START, do_election_vote);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_DC_JOIN_OFFER_ALL)) {
+ do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ALL, do_dc_join_offer_all);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_DC_JOIN_OFFER_ONE)) {
+ do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ONE, do_dc_join_offer_one);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_DC_JOIN_PROCESS_REQ)) {
+ do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_REQ,
+ do_dc_join_filter_offer);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_DC_JOIN_PROCESS_ACK)) {
+ do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_ACK, do_dc_join_ack);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_DC_JOIN_FINALIZE)) {
+ do_fsa_action(fsa_data, A_DC_JOIN_FINALIZE, do_dc_join_finalize);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_CL_JOIN_ANNOUNCE)) {
+ do_fsa_action(fsa_data, A_CL_JOIN_ANNOUNCE, do_cl_join_announce);
+
+ /*
+ * Low(er) priority actions
+ * Make sure the CIB is always updated before invoking the
+ * scheduler, and the scheduler before the transition engine.
+ */
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_HALT)) {
+ do_fsa_action(fsa_data, A_TE_HALT, do_te_invoke);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_CANCEL)) {
+ do_fsa_action(fsa_data, A_TE_CANCEL, do_te_invoke);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_INVOKE)) {
+ do_fsa_action(fsa_data, A_LRM_INVOKE, do_lrm_invoke);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_INVOKE)) {
+ do_fsa_action(fsa_data, A_PE_INVOKE, do_pe_invoke);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_INVOKE)) {
+ do_fsa_action(fsa_data, A_TE_INVOKE, do_te_invoke);
+
+ /* Shutdown actions */
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_RELEASED)) {
+ do_fsa_action(fsa_data, A_DC_RELEASED, do_dc_release);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_STOP)) {
+ do_fsa_action(fsa_data, A_PE_STOP, do_pe_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_STOP)) {
+ do_fsa_action(fsa_data, A_TE_STOP, do_te_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_SHUTDOWN)) {
+ do_fsa_action(fsa_data, A_SHUTDOWN, do_shutdown);
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_LRM_DISCONNECT)) {
+ do_fsa_action(fsa_data, A_LRM_DISCONNECT, do_lrm_control);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_HA_DISCONNECT)) {
+ do_fsa_action(fsa_data, A_HA_DISCONNECT, do_ha_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_CIB_STOP)) {
+ do_fsa_action(fsa_data, A_CIB_STOP, do_cib_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_STOP)) {
+ do_fsa_action(fsa_data, A_STOP, do_stop);
+
+ /* exit gracefully */
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_EXIT_0)) {
+ do_fsa_action(fsa_data, A_EXIT_0, do_exit);
+
+ /* Error checking and reporting */
+ } else {
+ crm_err("Action %s not supported "CRM_XS" %#llx",
+ fsa_action2string(controld_globals.fsa_actions),
+ (unsigned long long) controld_globals.fsa_actions);
+ register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, fsa_data, NULL,
+ __func__);
+ }
+ }
+}
+
+void
+log_fsa_input(fsa_data_t * stored_msg)
+{
+ CRM_ASSERT(stored_msg);
+ crm_trace("Processing queued input %d", stored_msg->id);
+ if (stored_msg->fsa_cause == C_LRM_OP_CALLBACK) {
+ crm_trace("FSA processing LRM callback from %s", stored_msg->origin);
+
+ } else if (stored_msg->data == NULL) {
+ crm_trace("FSA processing input from %s", stored_msg->origin);
+
+ } else {
+ ha_msg_input_t *ha_input = fsa_typed_data_adv(stored_msg, fsa_dt_ha_msg,
+ __func__);
+
+ crm_trace("FSA processing XML message from %s", stored_msg->origin);
+ crm_log_xml_trace(ha_input->xml, "FSA message data");
+ }
+}
+
+static void
+check_join_counts(fsa_data_t *msg_data)
+{
+ int count;
+ guint npeers;
+
+ count = crmd_join_phase_count(crm_join_finalized);
+ if (count > 0) {
+ crm_err("%d cluster node%s failed to confirm join",
+ count, pcmk__plural_s(count));
+ crmd_join_phase_log(LOG_NOTICE);
+ return;
+ }
+
+ npeers = crm_active_peers();
+ count = crmd_join_phase_count(crm_join_confirmed);
+ if (count == npeers) {
+ if (npeers == 1) {
+ crm_debug("Sole active cluster node is fully joined");
+ } else {
+ crm_debug("All %d active cluster nodes are fully joined", count);
+ }
+
+ } else if (count > npeers) {
+ crm_err("New election needed because more nodes confirmed join "
+ "than are in membership (%d > %u)", count, npeers);
+ register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
+
+ } else if (controld_globals.membership_id != crm_peer_seq) {
+ crm_info("New join needed because membership changed (%llu -> %llu)",
+ controld_globals.membership_id, crm_peer_seq);
+ register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
+
+ } else {
+ crm_warn("Only %d of %u active cluster nodes fully joined "
+ "(%d did not respond to offer)",
+ count, npeers, crmd_join_phase_count(crm_join_welcomed));
+ }
+}
+
+static void
+do_state_transition(enum crmd_fsa_state cur_state,
+ enum crmd_fsa_state next_state, fsa_data_t *msg_data)
+{
+ int level = LOG_INFO;
+ int count = 0;
+ gboolean clear_recovery_bit = TRUE;
+#if 0
+ uint64_t original_fsa_actions = controld_globals.fsa_actions;
+#endif
+
+ enum crmd_fsa_cause cause = msg_data->fsa_cause;
+ enum crmd_fsa_input current_input = msg_data->fsa_input;
+
+ const char *state_from = fsa_state2string(cur_state);
+ const char *state_to = fsa_state2string(next_state);
+ const char *input = fsa_input2string(current_input);
+
+ CRM_LOG_ASSERT(cur_state != next_state);
+
+ do_dot_log(DOT_PREFIX "\t%s -> %s [ label=%s cause=%s origin=%s ]",
+ state_from, state_to, input, fsa_cause2string(cause), msg_data->origin);
+
+ if (cur_state == S_IDLE || next_state == S_IDLE) {
+ level = LOG_NOTICE;
+ } else if (cur_state == S_NOT_DC || next_state == S_NOT_DC) {
+ level = LOG_NOTICE;
+ } else if (cur_state == S_ELECTION) {
+ level = LOG_NOTICE;
+ } else if (cur_state == S_STARTING) {
+ level = LOG_NOTICE;
+ } else if (next_state == S_RECOVERY) {
+ level = LOG_WARNING;
+ }
+
+ do_crm_log(level, "State transition %s -> %s "
+ CRM_XS " input=%s cause=%s origin=%s",
+ state_from, state_to, input, fsa_cause2string(cause),
+ msg_data->origin);
+
+ if (next_state != S_ELECTION && cur_state != S_RELEASE_DC) {
+ controld_stop_current_election_timeout();
+ }
+#if 0
+ if ((controld_globals.fsa_input_register & R_SHUTDOWN)) {
+ controld_set_fsa_action_flags(A_DC_TIMER_STOP);
+ }
+#endif
+ if (next_state == S_INTEGRATION) {
+ controld_set_fsa_action_flags(A_INTEGRATE_TIMER_START);
+ } else {
+ controld_set_fsa_action_flags(A_INTEGRATE_TIMER_STOP);
+ }
+
+ if (next_state == S_FINALIZE_JOIN) {
+ controld_set_fsa_action_flags(A_FINALIZE_TIMER_START);
+ } else {
+ controld_set_fsa_action_flags(A_FINALIZE_TIMER_STOP);
+ }
+
+ if (next_state != S_PENDING) {
+ controld_set_fsa_action_flags(A_DC_TIMER_STOP);
+ }
+ if (next_state != S_IDLE) {
+ controld_stop_recheck_timer();
+ }
+
+ if (cur_state == S_FINALIZE_JOIN && next_state == S_POLICY_ENGINE) {
+ populate_cib_nodes(node_update_quick|node_update_all, __func__);
+ }
+
+ switch (next_state) {
+ case S_PENDING:
+ {
+ cib_t *cib_conn = controld_globals.cib_conn;
+ cib_conn->cmds->set_secondary(cib_conn, cib_scope_local);
+ }
+ update_dc(NULL);
+ break;
+
+ case S_ELECTION:
+ update_dc(NULL);
+ break;
+
+ case S_NOT_DC:
+ controld_reset_counter_election_timer();
+ purge_stonith_cleanup();
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ crm_info("(Re)Issuing shutdown request now" " that we have a new DC");
+ controld_set_fsa_action_flags(A_SHUTDOWN_REQ);
+ }
+ CRM_LOG_ASSERT(controld_globals.dc_name != NULL);
+ if (controld_globals.dc_name == NULL) {
+ crm_err("Reached S_NOT_DC without a DC" " being recorded");
+ }
+ break;
+
+ case S_RECOVERY:
+ clear_recovery_bit = FALSE;
+ break;
+
+ case S_FINALIZE_JOIN:
+ CRM_LOG_ASSERT(AM_I_DC);
+ if (cause == C_TIMER_POPPED) {
+ crm_warn("Progressed to state %s after %s",
+ fsa_state2string(next_state), fsa_cause2string(cause));
+ }
+ count = crmd_join_phase_count(crm_join_welcomed);
+ if (count > 0) {
+ crm_warn("%d cluster node%s failed to respond to join offer",
+ count, pcmk__plural_s(count));
+ crmd_join_phase_log(LOG_NOTICE);
+
+ } else {
+ crm_debug("All cluster nodes (%d) responded to join offer",
+ crmd_join_phase_count(crm_join_integrated));
+ }
+ break;
+
+ case S_POLICY_ENGINE:
+ controld_reset_counter_election_timer();
+ CRM_LOG_ASSERT(AM_I_DC);
+ if (cause == C_TIMER_POPPED) {
+ crm_info("Progressed to state %s after %s",
+ fsa_state2string(next_state), fsa_cause2string(cause));
+ }
+ check_join_counts(msg_data);
+ break;
+
+ case S_STOPPING:
+ case S_TERMINATE:
+ /* possibly redundant */
+ controld_set_fsa_input_flags(R_SHUTDOWN);
+ break;
+
+ case S_IDLE:
+ CRM_LOG_ASSERT(AM_I_DC);
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ crm_info("(Re)Issuing shutdown request now" " that we are the DC");
+ controld_set_fsa_action_flags(A_SHUTDOWN_REQ);
+ }
+ controld_start_recheck_timer();
+ break;
+
+ default:
+ break;
+ }
+
+ if (clear_recovery_bit && next_state != S_PENDING) {
+ controld_clear_fsa_action_flags(A_RECOVER);
+ } else if (clear_recovery_bit == FALSE) {
+ controld_set_fsa_action_flags(A_RECOVER);
+ }
+
+#if 0
+ if (original_fsa_actions != controld_globals.fsa_actions) {
+ fsa_dump_actions(original_fsa_actions ^ controld_globals.fsa_actions,
+ "New actions");
+ }
+#endif
+}
diff --git a/daemons/controld/controld_fsa.h b/daemons/controld/controld_fsa.h
new file mode 100644
index 0000000..2b79f07
--- /dev/null
+++ b/daemons/controld/controld_fsa.h
@@ -0,0 +1,694 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef CRMD_FSA__H
+# define CRMD_FSA__H
+
+# include <crm/crm.h>
+# include <crm/cib.h>
+# include <crm/common/xml.h>
+# include <crm/common/mainloop.h>
+# include <crm/cluster.h>
+# include <crm/cluster/election_internal.h>
+# include <crm/common/ipc_internal.h>
+
+/*! States the controller can be in */
+enum crmd_fsa_state {
+ S_IDLE = 0, /* Nothing happening */
+
+ S_ELECTION, /* Take part in the election algorithm as
+ * described below
+ */
+ S_INTEGRATION, /* integrate that status of new nodes (which is
+ * all of them if we have just been elected DC)
+ * to form a complete and up-to-date picture of
+ * the CIB
+ */
+ S_FINALIZE_JOIN, /* integrate that status of new nodes (which is
+ * all of them if we have just been elected DC)
+ * to form a complete and up-to-date picture of
+ * the CIB
+ */
+ S_NOT_DC, /* we are in non-DC mode */
+ S_POLICY_ENGINE, /* Determine next stable state of the cluster */
+ S_RECOVERY, /* Something bad happened, check everything is ok
+ * before continuing and attempt to recover if
+ * required
+ */
+ S_RELEASE_DC, /* we were the DC, but now we arent anymore,
+ * possibly by our own request, and we should
+ * release all unnecessary sub-systems, finish
+ * any pending actions, do general cleanup and
+ * unset anything that makes us think we are
+ * special :)
+ */
+ S_STARTING, /* we are just starting out */
+ S_PENDING, /* we are not a full/active member yet */
+ S_STOPPING, /* We are in the final stages of shutting down */
+ S_TERMINATE, /* We are going to shutdown, this is the equiv of
+ * "Sending TERM signal to all processes" in Linux
+ * and in worst case scenarios could be considered
+ * a self STONITH
+ */
+ S_TRANSITION_ENGINE, /* Attempt to make the calculated next stable
+ * state of the cluster a reality
+ */
+
+ S_HALT, /* Freeze - don't do anything
+ * Something bad happened that needs the admin to fix
+ * Wait for I_ELECTION
+ */
+
+ /* ----------- Last input found in table is above ---------- */
+ S_ILLEGAL /* This is an illegal FSA state */
+ /* (must be last) */
+};
+
+# define MAXSTATE S_ILLEGAL
+
+/*
+ Once we start and do some basic sanity checks, we go into the
+ S_NOT_DC state and await instructions from the DC or input from
+ the cluster layer which indicates the election algorithm needs to run.
+
+ If the election algorithm is triggered, we enter the S_ELECTION state
+ from where we can either go back to the S_NOT_DC state or progress
+ to the S_INTEGRATION state (or S_RELEASE_DC if we used to be the DC
+ but aren't anymore). See the libcrmcluster API documentation for more
+ information about the election algorithm.
+
+ Once the election is complete, if we are the DC, we enter the
+ S_INTEGRATION state which is a DC-in-waiting style state. We are
+ the DC, but we shouldn't do anything yet because we may not have an
+ up-to-date picture of the cluster. There may of course be times
+ when this fails, so we should go back to the S_RECOVERY stage and
+ check everything is ok. We may also end up here if a new node came
+ online, since each node is authoritative about itself, and we would want
+ to incorporate its information into the CIB.
+
+ Once we have the latest CIB, we then enter the S_POLICY_ENGINE state
+ where invoke the scheduler. It is possible that between
+ invoking the scheduler and receiving an answer, that we receive
+ more input. In this case, we would discard the orginal result and
+ invoke it again.
+
+ Once we are satisfied with the output from the scheduler, we
+ enter S_TRANSITION_ENGINE and feed the scheduler's output to the
+ Transition Engine who attempts to make the scheduler's
+ calculation a reality. If the transition completes successfully,
+ we enter S_IDLE, otherwise we go back to S_POLICY_ENGINE with the
+ current unstable state and try again.
+
+ Of course, we may be asked to shutdown at any time, however we must
+ progress to S_NOT_DC before doing so. Once we have handed over DC
+ duties to another node, we can then shut down like everyone else,
+ that is, by asking the DC for permission and waiting for it to take all
+ our resources away.
+
+ The case where we are the DC and the only node in the cluster is a
+ special case and handled as an escalation which takes us to
+ S_SHUTDOWN. Similarly, if any other point in the shutdown
+ fails or stalls, this is escalated and we end up in S_TERMINATE.
+
+ At any point, the controller can relay messages for its subsystems,
+ but outbound messages (from subsystems) should probably be blocked
+ until S_INTEGRATION (for the DC) or the join protocol has
+ completed (for non-DC controllers).
+*/
+
+/*======================================
+ *
+ * Inputs/Events/Stimuli to be given to the finite state machine
+ *
+ * Some of these a true events, and others are synthesised based on
+ * the "register" (see below) and the contents or source of messages.
+ *
+ * The machine keeps processing until receiving I_NULL
+ *
+ *======================================*/
+enum crmd_fsa_input {
+/* 0 */
+ I_NULL, /* Nothing happened */
+/* 1 */
+
+ I_CIB_OP, /* An update to the CIB occurred */
+ I_CIB_UPDATE, /* An update to the CIB occurred */
+ I_DC_TIMEOUT, /* We have lost communication with the DC */
+ I_ELECTION, /* Someone started an election */
+ I_PE_CALC, /* The scheduler needs to be invoked */
+ I_RELEASE_DC, /* The election completed and we were not
+ * elected, but we were the DC beforehand
+ */
+ I_ELECTION_DC, /* The election completed and we were (re-)elected
+ * DC
+ */
+ I_ERROR, /* Something bad happened (more serious than
+ * I_FAIL) and may not have been due to the action
+ * being performed. For example, we may have lost
+ * our connection to the CIB.
+ */
+/* 9 */
+ I_FAIL, /* The action failed to complete successfully */
+ I_INTEGRATED,
+ I_FINALIZED,
+ I_NODE_JOIN, /* A node has entered the cluster */
+ I_NOT_DC, /* We are not and were not the DC before or after
+ * the current operation or state
+ */
+ I_RECOVERED, /* The recovery process completed successfully */
+ I_RELEASE_FAIL, /* We could not give up DC status for some reason
+ */
+ I_RELEASE_SUCCESS, /* We are no longer the DC */
+ I_RESTART, /* The current set of actions needs to be
+ * restarted
+ */
+ I_TE_SUCCESS, /* Some non-resource, non-cluster-layer action
+ * is required of us, e.g. ping
+ */
+/* 20 */
+ I_ROUTER, /* Do our job as router and forward this to the
+ * right place
+ */
+ I_SHUTDOWN, /* We are asking to shutdown */
+ I_STOP, /* We have been told to shutdown */
+ I_TERMINATE, /* Actually exit */
+ I_STARTUP,
+ I_PE_SUCCESS, /* The action completed successfully */
+
+ I_JOIN_OFFER, /* The DC is offering membership */
+ I_JOIN_REQUEST, /* The client is requesting membership */
+ I_JOIN_RESULT, /* If not the DC: The result of a join request
+ * Else: A client is responding with its local state info
+ */
+
+ I_WAIT_FOR_EVENT, /* we may be waiting for an async task to "happen"
+ * and until it does, we can't do anything else
+ */
+
+ I_DC_HEARTBEAT, /* The DC is telling us that it is alive and well */
+
+ I_LRM_EVENT,
+
+/* 30 */
+ I_PENDING,
+ I_HALT,
+
+ /* ------------ Last input found in table is above ----------- */
+ I_ILLEGAL /* This is an illegal value for an FSA input */
+ /* (must be last) */
+};
+
+# define MAXINPUT I_ILLEGAL
+
+# define I_MESSAGE I_ROUTER
+
+/*======================================
+ *
+ * actions
+ *
+ * Some of the actions below will always occur together for now, but this may
+ * not always be the case, so they are split up so that they can easily be
+ * called independently in the future, if necessary.
+ *
+ * For example, separating A_LRM_CONNECT from A_STARTUP might be useful
+ * if we ever try to recover from a faulty or disconnected executor.
+ *
+ *======================================*/
+
+ /* Don't do anything */
+# define A_NOTHING 0x0000000000000000ULL
+
+/* -- Startup actions -- */
+ /* Hook to perform any actions (other than connecting to other daemons)
+ * that might be needed as part of the startup.
+ */
+# define A_STARTUP 0x0000000000000001ULL
+ /* Hook to perform any actions that might be needed as part
+ * after startup is successful.
+ */
+# define A_STARTED 0x0000000000000002ULL
+ /* Connect to cluster layer */
+# define A_HA_CONNECT 0x0000000000000004ULL
+# define A_HA_DISCONNECT 0x0000000000000008ULL
+
+# define A_INTEGRATE_TIMER_START 0x0000000000000010ULL
+# define A_INTEGRATE_TIMER_STOP 0x0000000000000020ULL
+# define A_FINALIZE_TIMER_START 0x0000000000000040ULL
+# define A_FINALIZE_TIMER_STOP 0x0000000000000080ULL
+
+/* -- Election actions -- */
+# define A_DC_TIMER_START 0x0000000000000100ULL
+# define A_DC_TIMER_STOP 0x0000000000000200ULL
+# define A_ELECTION_COUNT 0x0000000000000400ULL
+# define A_ELECTION_VOTE 0x0000000000000800ULL
+
+# define A_ELECTION_START 0x0000000000001000ULL
+
+/* -- Message processing -- */
+ /* Process the queue of requests */
+# define A_MSG_PROCESS 0x0000000000002000ULL
+ /* Send the message to the correct recipient */
+# define A_MSG_ROUTE 0x0000000000004000ULL
+
+ /* Send a welcome message to new node(s) */
+# define A_DC_JOIN_OFFER_ONE 0x0000000000008000ULL
+
+/* -- Server Join protocol actions -- */
+ /* Send a welcome message to all nodes */
+# define A_DC_JOIN_OFFER_ALL 0x0000000000010000ULL
+ /* Process the remote node's ack of our join message */
+# define A_DC_JOIN_PROCESS_REQ 0x0000000000020000ULL
+ /* Send out the results of the Join phase */
+# define A_DC_JOIN_FINALIZE 0x0000000000040000ULL
+ /* Send out the results of the Join phase */
+# define A_DC_JOIN_PROCESS_ACK 0x0000000000080000ULL
+
+/* -- Client Join protocol actions -- */
+# define A_CL_JOIN_QUERY 0x0000000000100000ULL
+# define A_CL_JOIN_ANNOUNCE 0x0000000000200000ULL
+ /* Request membership to the DC list */
+# define A_CL_JOIN_REQUEST 0x0000000000400000ULL
+ /* Did the DC accept or reject the request */
+# define A_CL_JOIN_RESULT 0x0000000000800000ULL
+
+/* -- Recovery, DC start/stop -- */
+ /* Something bad happened, try to recover */
+# define A_RECOVER 0x0000000001000000ULL
+ /* Hook to perform any actions (apart from starting, the TE, scheduler,
+ * and gathering the latest CIB) that might be necessary before
+ * giving up the responsibilities of being the DC.
+ */
+# define A_DC_RELEASE 0x0000000002000000ULL
+ /* */
+# define A_DC_RELEASED 0x0000000004000000ULL
+ /* Hook to perform any actions (apart from starting, the TE, scheduler,
+ * and gathering the latest CIB) that might be necessary before
+ * taking over the responsibilities of being the DC.
+ */
+# define A_DC_TAKEOVER 0x0000000008000000ULL
+
+/* -- Shutdown actions -- */
+# define A_SHUTDOWN 0x0000000010000000ULL
+# define A_STOP 0x0000000020000000ULL
+# define A_EXIT_0 0x0000000040000000ULL
+# define A_EXIT_1 0x0000000080000000ULL
+
+# define A_SHUTDOWN_REQ 0x0000000100000000ULL
+# define A_ELECTION_CHECK 0x0000000200000000ULL
+# define A_DC_JOIN_FINAL 0x0000000400000000ULL
+
+/* -- CIB actions -- */
+# define A_CIB_START 0x0000020000000000ULL
+# define A_CIB_STOP 0x0000040000000000ULL
+
+/* -- Transition Engine actions -- */
+ /* Attempt to reach the newly calculated cluster state. This is
+ * only called once per transition (except if it is asked to
+ * stop the transition or start a new one).
+ * Once given a cluster state to reach, the TE will determine
+ * tasks that can be performed in parallel, execute them, wait
+ * for replies and then determine the next set until the new
+ * state is reached or no further tasks can be taken.
+ */
+# define A_TE_INVOKE 0x0000100000000000ULL
+# define A_TE_START 0x0000200000000000ULL
+# define A_TE_STOP 0x0000400000000000ULL
+# define A_TE_CANCEL 0x0000800000000000ULL
+# define A_TE_HALT 0x0001000000000000ULL
+
+/* -- Scheduler actions -- */
+ /* Calculate the next state for the cluster. This is only
+ * invoked once per needed calculation.
+ */
+# define A_PE_INVOKE 0x0002000000000000ULL
+# define A_PE_START 0x0004000000000000ULL
+# define A_PE_STOP 0x0008000000000000ULL
+/* -- Misc actions -- */
+ /* Add a system generate "block" so that resources arent moved
+ * to or are activly moved away from the affected node. This
+ * way we can return quickly even if busy with other things.
+ */
+# define A_NODE_BLOCK 0x0010000000000000ULL
+ /* Update our information in the local CIB */
+# define A_UPDATE_NODESTATUS 0x0020000000000000ULL
+# define A_READCONFIG 0x0080000000000000ULL
+
+/* -- LRM Actions -- */
+ /* Connect to pacemaker-execd */
+# define A_LRM_CONNECT 0x0100000000000000ULL
+ /* Disconnect from pacemaker-execd */
+# define A_LRM_DISCONNECT 0x0200000000000000ULL
+# define A_LRM_INVOKE 0x0400000000000000ULL
+# define A_LRM_EVENT 0x0800000000000000ULL
+
+/* -- Logging actions -- */
+# define A_LOG 0x1000000000000000ULL
+# define A_ERROR 0x2000000000000000ULL
+# define A_WARN 0x4000000000000000ULL
+
+# define O_EXIT (A_SHUTDOWN|A_STOP|A_LRM_DISCONNECT|A_HA_DISCONNECT|A_EXIT_0|A_CIB_STOP)
+# define O_RELEASE (A_DC_TIMER_STOP|A_DC_RELEASE|A_PE_STOP|A_TE_STOP|A_DC_RELEASED)
+# define O_PE_RESTART (A_PE_START|A_PE_STOP)
+# define O_TE_RESTART (A_TE_START|A_TE_STOP)
+# define O_CIB_RESTART (A_CIB_START|A_CIB_STOP)
+# define O_LRM_RECONNECT (A_LRM_CONNECT|A_LRM_DISCONNECT)
+# define O_DC_TIMER_RESTART (A_DC_TIMER_STOP|A_DC_TIMER_START)
+/*======================================
+ *
+ * "register" contents
+ *
+ * Things we may want to remember regardless of which state we are in.
+ *
+ * These also count as inputs for synthesizing I_*
+ *
+ *======================================*/
+# define R_THE_DC 0x00000001ULL
+ /* Are we the DC? */
+# define R_STARTING 0x00000002ULL
+ /* Are we starting up? */
+# define R_SHUTDOWN 0x00000004ULL
+ /* Are we trying to shut down? */
+# define R_STAYDOWN 0x00000008ULL
+ /* Should we restart? */
+
+# define R_JOIN_OK 0x00000010ULL /* Have we completed the join process */
+# define R_READ_CONFIG 0x00000040ULL
+# define R_INVOKE_PE 0x00000080ULL // Should the scheduler be invoked?
+
+# define R_CIB_CONNECTED 0x00000100ULL
+ /* Is the CIB connected? */
+# define R_PE_CONNECTED 0x00000200ULL // Is the scheduler connected?
+# define R_TE_CONNECTED 0x00000400ULL
+ /* Is the Transition Engine connected? */
+# define R_LRM_CONNECTED 0x00000800ULL // Is pacemaker-execd connected?
+
+# define R_CIB_REQUIRED 0x00001000ULL
+ /* Is the CIB required? */
+# define R_PE_REQUIRED 0x00002000ULL // Is the scheduler required?
+# define R_TE_REQUIRED 0x00004000ULL
+ /* Is the Transition Engine required? */
+# define R_ST_REQUIRED 0x00008000ULL
+ /* Is the Stonith daemon required? */
+
+# define R_CIB_DONE 0x00010000ULL
+ /* Have we calculated the CIB? */
+# define R_HAVE_CIB 0x00020000ULL /* Do we have an up-to-date CIB */
+
+# define R_MEMBERSHIP 0x00100000ULL /* Have we got cluster layer data yet */
+# define R_PEER_DATA 0x00200000ULL /* Have we got T_CL_STATUS data yet */
+
+# define R_HA_DISCONNECTED 0x00400000ULL /* did we sign out of our own accord */
+
+# define R_REQ_PEND 0x01000000ULL
+ /* Are there Requests waiting for
+ processing? */
+# define R_PE_PEND 0x02000000ULL // Are we awaiting reply from scheduler?
+# define R_TE_PEND 0x04000000ULL
+ /* Has the TE been invoked and we're
+ awaiting completion? */
+# define R_RESP_PEND 0x08000000ULL
+ /* Do we have clients waiting on a
+ response? if so perhaps we shouldn't
+ stop yet */
+
+# define R_SENT_RSC_STOP 0x20000000ULL /* Have we sent a stop action to all
+ * resources in preparation for
+ * shutting down */
+
+# define R_IN_RECOVERY 0x80000000ULL
+
+#define CRM_DIRECT_NACK_RC (99) // Deprecated (see PCMK_EXEC_INVALID)
+
+enum crmd_fsa_cause {
+ C_UNKNOWN = 0,
+ C_STARTUP,
+ C_IPC_MESSAGE,
+ C_HA_MESSAGE,
+ C_CRMD_STATUS_CALLBACK,
+ C_LRM_OP_CALLBACK,
+ C_TIMER_POPPED,
+ C_SHUTDOWN,
+ C_FSA_INTERNAL,
+};
+
+enum fsa_data_type {
+ fsa_dt_none,
+ fsa_dt_ha_msg,
+ fsa_dt_xml,
+ fsa_dt_lrm,
+};
+
+typedef struct fsa_data_s fsa_data_t;
+struct fsa_data_s {
+ int id;
+ enum crmd_fsa_input fsa_input;
+ enum crmd_fsa_cause fsa_cause;
+ uint64_t actions;
+ const char *origin;
+ void *data;
+ enum fsa_data_type data_type;
+};
+
+#define controld_set_fsa_input_flags(flags_to_set) do { \
+ controld_globals.fsa_input_register \
+ = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
+ "FSA input", "controller", \
+ controld_globals.fsa_input_register, \
+ (flags_to_set), #flags_to_set); \
+ } while (0)
+
+#define controld_clear_fsa_input_flags(flags_to_clear) do { \
+ controld_globals.fsa_input_register \
+ = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
+ "FSA input", "controller", \
+ controld_globals.fsa_input_register, \
+ (flags_to_clear), \
+ #flags_to_clear); \
+ } while (0)
+
+#define controld_set_fsa_action_flags(flags_to_set) do { \
+ controld_globals.fsa_actions \
+ = pcmk__set_flags_as(__func__, __LINE__, LOG_DEBUG, \
+ "FSA action", "controller", \
+ controld_globals.fsa_actions, \
+ (flags_to_set), #flags_to_set); \
+ } while (0)
+
+#define controld_clear_fsa_action_flags(flags_to_clear) do { \
+ controld_globals.fsa_actions \
+ = pcmk__clear_flags_as(__func__, __LINE__, LOG_DEBUG, \
+ "FSA action", "controller", \
+ controld_globals.fsa_actions, \
+ (flags_to_clear), #flags_to_clear); \
+ } while (0)
+
+// This should be moved elsewhere
+xmlNode *controld_query_executor_state(void);
+
+const char *fsa_input2string(enum crmd_fsa_input input);
+const char *fsa_state2string(enum crmd_fsa_state state);
+const char *fsa_cause2string(enum crmd_fsa_cause cause);
+const char *fsa_action2string(long long action);
+
+enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause);
+
+enum crmd_fsa_state controld_fsa_get_next_state(enum crmd_fsa_input input);
+
+uint64_t controld_fsa_get_action(enum crmd_fsa_input input);
+
+void controld_init_fsa_trigger(void);
+void controld_destroy_fsa_trigger(void);
+
+void free_max_generation(void);
+
+# define AM_I_DC pcmk_is_set(controld_globals.fsa_input_register, R_THE_DC)
+# define controld_trigger_fsa() controld_trigger_fsa_as(__func__, __LINE__)
+
+void controld_trigger_fsa_as(const char *fn, int line);
+
+/* A_READCONFIG */
+void do_read_config(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t *msg_data);
+
+/* A_PE_INVOKE */
+void do_pe_invoke(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t *msg_data);
+
+/* A_LOG */
+void do_log(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_STARTUP */
+void do_startup(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_CIB_START, STOP, RESTART */
+void do_cib_control(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_HA_CONNECT */
+void do_ha_control(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_LRM_CONNECT */
+void do_lrm_control(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_PE_START, STOP, RESTART */
+void do_pe_control(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_TE_START, STOP, RESTART */
+void do_te_control(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_STARTED */
+void do_started(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_MSG_ROUTE */
+void do_msg_route(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_RECOVER */
+void do_recover(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_ELECTION_VOTE */
+void do_election_vote(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_ELECTION_COUNT */
+void do_election_count_vote(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input,
+ fsa_data_t *msg_data);
+
+/* A_ELECTION_CHECK */
+void do_election_check(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_DC_TIMER_STOP */
+void do_timer_control(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_DC_TAKEOVER */
+void do_dc_takeover(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_DC_RELEASE */
+void do_dc_release(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_DC_JOIN_OFFER_ALL */
+void do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_DC_JOIN_OFFER_ONE */
+void do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_DC_JOIN_ACK */
+void do_dc_join_ack(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_DC_JOIN_REQ */
+void do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input,
+ fsa_data_t *msg_data);
+
+/* A_DC_JOIN_FINALIZE */
+void do_dc_join_finalize(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_CL_JOIN_QUERY */
+/* is there a DC out there? */
+void do_cl_join_query(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t *msg_data);
+
+/* A_CL_JOIN_ANNOUNCE */
+void do_cl_join_announce(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t *msg_data);
+
+/* A_CL_JOIN_REQUEST */
+void do_cl_join_offer_respond(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input,
+ fsa_data_t *msg_data);
+
+/* A_CL_JOIN_RESULT */
+void do_cl_join_finalize_respond(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input,
+ fsa_data_t *msg_data);
+
+/* A_LRM_INVOKE */
+void do_lrm_invoke(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_LRM_EVENT */
+void do_lrm_event(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_TE_INVOKE, A_TE_CANCEL */
+void do_te_invoke(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_SHUTDOWN_REQ */
+void do_shutdown_req(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_SHUTDOWN */
+void do_shutdown(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_STOP */
+void do_stop(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_EXIT_0, A_EXIT_1 */
+void do_exit(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_DC_JOIN_FINAL */
+void do_dc_join_final(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t *msg_data);
+#endif
diff --git a/daemons/controld/controld_globals.h b/daemons/controld/controld_globals.h
new file mode 100644
index 0000000..eff1607
--- /dev/null
+++ b/daemons/controld/controld_globals.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright 2022-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef CONTROLD_GLOBALS__H
+# define CONTROLD_GLOBALS__H
+
+#include <crm_internal.h> // pcmk__output_t, etc.
+
+#include <stdint.h> // uint32_t, uint64_t
+#include <glib.h> // GList, GMainLoop
+#include <crm/cib.h> // cib_t
+#include <pacemaker-internal.h> // pcmk__graph_t
+#include <controld_fsa.h> // enum crmd_fsa_state
+
+typedef struct {
+ // Booleans
+
+ //! Group of \p controld_flags values
+ uint32_t flags;
+
+
+ // Controller FSA
+
+ //! FSA state
+ enum crmd_fsa_state fsa_state;
+
+ //! FSA actions (group of \p A_* flags)
+ uint64_t fsa_actions;
+
+ //! FSA input register contents (group of \p R_* flags)
+ uint64_t fsa_input_register;
+
+ //! FSA message queue
+ GList *fsa_message_queue;
+
+
+ // CIB
+
+ //! Connection to the CIB
+ cib_t *cib_conn;
+
+ //! CIB connection's client ID
+ const char *cib_client_id;
+
+
+ // Scheduler
+
+ //! Reference of the scheduler request being waited on
+ char *fsa_pe_ref;
+
+
+ // Transitioner
+
+ //! Transitioner UUID
+ char *te_uuid;
+
+ //! Graph of transition currently being processed
+ pcmk__graph_t *transition_graph;
+
+
+ // Logging
+
+ //! Output object for controller log messages
+ pcmk__output_t *logger_out;
+
+
+ // Other
+
+ //! Cluster name
+ char *cluster_name;
+
+ //! Designated controller name
+ char *dc_name;
+
+ //! Designated controller's Pacemaker version
+ char *dc_version;
+
+ //! Local node's node name
+ char *our_nodename;
+
+ //! Local node's UUID
+ char *our_uuid;
+
+ //! Last saved cluster communication layer membership ID
+ unsigned long long membership_id;
+
+ //! Max lifetime (in seconds) of a resource's shutdown lock to a node
+ guint shutdown_lock_limit;
+
+ //! Main event loop
+ GMainLoop *mainloop;
+} controld_globals_t;
+
+extern controld_globals_t controld_globals;
+
+/*!
+ * \internal
+ * \enum controld_flags
+ * \brief Bit flags to store various controller state and configuration info
+ */
+enum controld_flags {
+ //! The DC left in a membership change that is being processed
+ controld_dc_left = (1 << 0),
+
+ //! The FSA is stalled waiting for further input
+ controld_fsa_is_stalled = (1 << 1),
+
+ //! The local node has been in a quorate partition at some point
+ controld_ever_had_quorum = (1 << 2),
+
+ //! The local node is currently in a quorate partition
+ controld_has_quorum = (1 << 3),
+
+ //! Panic the local node if it loses quorum
+ controld_no_quorum_suicide = (1 << 4),
+
+ //! Lock resources to the local node when it shuts down cleanly
+ controld_shutdown_lock_enabled = (1 << 5),
+};
+
+# define controld_set_global_flags(flags_to_set) do { \
+ controld_globals.flags = pcmk__set_flags_as(__func__, __LINE__, \
+ LOG_TRACE, \
+ "Global", "controller", \
+ controld_globals.flags, \
+ (flags_to_set), \
+ #flags_to_set); \
+ } while (0)
+
+# define controld_clear_global_flags(flags_to_clear) do { \
+ controld_globals.flags \
+ = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Global", \
+ "controller", controld_globals.flags, \
+ (flags_to_clear), #flags_to_clear); \
+ } while (0)
+
+#endif // ifndef CONTROLD_GLOBALS__H
diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c
new file mode 100644
index 0000000..da6a9d6
--- /dev/null
+++ b/daemons/controld/controld_join_client.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <crm/crm.h>
+#include <crm/cib.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+
+#include <pacemaker-controld.h>
+
+void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
+
+extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig);
+
+/*!
+ * \internal
+ * \brief Remember if DC is shutting down as we join
+ *
+ * If we're joining while the current DC is shutting down, update its expected
+ * state, so we don't fence it if we become the new DC. (We weren't a peer
+ * when it broadcast its shutdown request.)
+ *
+ * \param[in] msg A join message from the DC
+ */
+static void
+update_dc_expected(const xmlNode *msg)
+{
+ if ((controld_globals.dc_name != NULL)
+ && pcmk__xe_attr_is_true(msg, F_CRM_DC_LEAVING)) {
+ crm_node_t *dc_node = crm_get_peer(0, controld_globals.dc_name);
+
+ pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_DOWN);
+ }
+}
+
+/* A_CL_JOIN_QUERY */
+/* is there a DC out there? */
+void
+do_cl_join_query(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL,
+ CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
+
+ sleep(1); // Give the cluster layer time to propagate to the DC
+ update_dc(NULL); /* Unset any existing value so that the result is not discarded */
+ crm_debug("Querying for a DC");
+ send_cluster_message(NULL, crm_msg_crmd, req, FALSE);
+ free_xml(req);
+}
+
+/* A_CL_JOIN_ANNOUNCE */
+
+/* this is kind of a workaround for the fact that we may not be around or
+ * are otherwise unable to reply when the DC sends out A_DC_JOIN_OFFER_ALL
+ */
+void
+do_cl_join_announce(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ /* don't announce if we're in one of these states */
+ if (cur_state != S_PENDING) {
+ crm_warn("Not announcing cluster join because in state %s",
+ fsa_state2string(cur_state));
+ return;
+ }
+
+ if (!pcmk_is_set(controld_globals.fsa_input_register, R_STARTING)) {
+ /* send as a broadcast */
+ xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL,
+ CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
+
+ crm_debug("Announcing availability");
+ update_dc(NULL);
+ send_cluster_message(NULL, crm_msg_crmd, req, FALSE);
+ free_xml(req);
+
+ } else {
+ /* Delay announce until we have finished local startup */
+ crm_warn("Delaying announce of cluster join until local startup is complete");
+ return;
+ }
+}
+
+static int query_call_id = 0;
+
+/* A_CL_JOIN_REQUEST */
+/* aka. accept the welcome offer */
+void
+do_cl_join_offer_respond(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ cib_t *cib_conn = controld_globals.cib_conn;
+
+ ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
+ const char *welcome_from;
+ const char *join_id;
+
+ CRM_CHECK(input != NULL, return);
+
+#if 0
+ if (we are sick) {
+ log error;
+
+ /* save the request for later? */
+ return;
+ }
+#endif
+
+ welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM);
+ join_id = crm_element_value(input->msg, F_CRM_JOIN_ID);
+ crm_trace("Accepting cluster join offer from node %s "CRM_XS" join-%s",
+ welcome_from, crm_element_value(input->msg, F_CRM_JOIN_ID));
+
+ /* we only ever want the last one */
+ if (query_call_id > 0) {
+ crm_trace("Cancelling previous join query: %d", query_call_id);
+ remove_cib_op_callback(query_call_id, FALSE);
+ query_call_id = 0;
+ }
+
+ if (update_dc(input->msg) == FALSE) {
+ crm_warn("Discarding cluster join offer from node %s (expected %s)",
+ welcome_from, controld_globals.dc_name);
+ return;
+ }
+
+ update_dc_expected(input->msg);
+
+ query_call_id = cib_conn->cmds->query(cib_conn, NULL, NULL,
+ cib_scope_local|cib_no_children);
+ fsa_register_cib_callback(query_call_id, strdup(join_id),
+ join_query_callback);
+ crm_trace("Registered join query callback: %d", query_call_id);
+
+ controld_set_fsa_action_flags(A_DC_TIMER_STOP);
+ controld_trigger_fsa();
+}
+
+void
+join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ char *join_id = user_data;
+ xmlNode *generation = create_xml_node(NULL, XML_CIB_TAG_GENERATION_TUPPLE);
+
+ CRM_LOG_ASSERT(join_id != NULL);
+
+ if (query_call_id != call_id) {
+ crm_trace("Query %d superseded", call_id);
+ goto done;
+ }
+
+ query_call_id = 0;
+ if(rc != pcmk_ok || output == NULL) {
+ crm_err("Could not retrieve version details for join-%s: %s (%d)",
+ join_id, pcmk_strerror(rc), rc);
+ register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
+
+ } else if (controld_globals.dc_name == NULL) {
+ crm_debug("Membership is in flux, not continuing join-%s", join_id);
+
+ } else {
+ xmlNode *reply = NULL;
+
+ crm_debug("Respond to join offer join-%s from %s",
+ join_id, controld_globals.dc_name);
+ copy_in_properties(generation, output);
+
+ reply = create_request(CRM_OP_JOIN_REQUEST, generation,
+ controld_globals.dc_name, CRM_SYSTEM_DC,
+ CRM_SYSTEM_CRMD, NULL);
+
+ crm_xml_add(reply, F_CRM_JOIN_ID, join_id);
+ crm_xml_add(reply, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
+ send_cluster_message(crm_get_peer(0, controld_globals.dc_name),
+ crm_msg_crmd, reply, TRUE);
+ free_xml(reply);
+ }
+
+ done:
+ free_xml(generation);
+}
+
+static void
+set_join_state(const char * start_state)
+{
+ if (pcmk__str_eq(start_state, "standby", pcmk__str_casei)) {
+ crm_notice("Forcing node %s to join in %s state per configured "
+ "environment", controld_globals.our_nodename, start_state);
+ cib__update_node_attr(controld_globals.logger_out,
+ controld_globals.cib_conn, cib_sync_call,
+ XML_CIB_TAG_NODES, controld_globals.our_uuid,
+ NULL, NULL, NULL, "standby", "on", NULL, NULL);
+
+ } else if (pcmk__str_eq(start_state, "online", pcmk__str_casei)) {
+ crm_notice("Forcing node %s to join in %s state per configured "
+ "environment", controld_globals.our_nodename, start_state);
+ cib__update_node_attr(controld_globals.logger_out,
+ controld_globals.cib_conn, cib_sync_call,
+ XML_CIB_TAG_NODES, controld_globals.our_uuid,
+ NULL, NULL, NULL, "standby", "off", NULL, NULL);
+
+ } else if (pcmk__str_eq(start_state, "default", pcmk__str_casei)) {
+ crm_debug("Not forcing a starting state on node %s",
+ controld_globals.our_nodename);
+
+ } else {
+ crm_warn("Unrecognized start state '%s', using 'default' (%s)",
+ start_state, controld_globals.our_nodename);
+ }
+}
+
+static int
+update_conn_host_cache(xmlNode *node, void *userdata)
+{
+ const char *remote = crm_element_value(node, XML_ATTR_ID);
+ const char *conn_host = crm_element_value(node, PCMK__XA_CONN_HOST);
+ const char *state = crm_element_value(node, XML_CIB_TAG_STATE);
+
+ crm_node_t *remote_peer = crm_remote_peer_get(remote);
+
+ if (remote_peer == NULL) {
+ return pcmk_rc_ok;
+ }
+
+ if (conn_host != NULL) {
+ pcmk__str_update(&remote_peer->conn_host, conn_host);
+ }
+
+ if (state != NULL) {
+ pcmk__update_peer_state(__func__, remote_peer, state, 0);
+ }
+
+ return pcmk_rc_ok;
+}
+
+/* A_CL_JOIN_RESULT */
+/* aka. this is notification that we have (or have not) been accepted */
+void
+do_cl_join_finalize_respond(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ xmlNode *tmp1 = NULL;
+ gboolean was_nack = TRUE;
+ static gboolean first_join = TRUE;
+ ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
+ const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE);
+
+ int join_id = -1;
+ const char *op = crm_element_value(input->msg, F_CRM_TASK);
+ const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM);
+
+ if (!pcmk__str_eq(op, CRM_OP_JOIN_ACKNAK, pcmk__str_casei)) {
+ crm_trace("Ignoring op=%s message", op);
+ return;
+ }
+
+ /* calculate if it was an ack or a nack */
+ if (pcmk__xe_attr_is_true(input->msg, CRM_OP_JOIN_ACKNAK)) {
+ was_nack = FALSE;
+ }
+
+ crm_element_value_int(input->msg, F_CRM_JOIN_ID, &join_id);
+
+ if (was_nack) {
+ crm_err("Shutting down because cluster join with leader %s failed "
+ CRM_XS" join-%d NACK'd", welcome_from, join_id);
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ controld_set_fsa_input_flags(R_STAYDOWN);
+ return;
+ }
+
+ if (!AM_I_DC
+ && pcmk__str_eq(welcome_from, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ crm_warn("Discarding our own welcome - we're no longer the DC");
+ return;
+ }
+
+ if (update_dc(input->msg) == FALSE) {
+ crm_warn("Discarding %s from node %s (expected from %s)",
+ op, welcome_from, controld_globals.dc_name);
+ return;
+ }
+
+ update_dc_expected(input->msg);
+
+ /* record the node's feature set as a transient attribute */
+ update_attrd(controld_globals.our_nodename, CRM_ATTR_FEATURE_SET,
+ CRM_FEATURE_SET, NULL, FALSE);
+
+ /* send our status section to the DC */
+ tmp1 = controld_query_executor_state();
+ if (tmp1 != NULL) {
+ xmlNode *remotes = NULL;
+ xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1,
+ controld_globals.dc_name, CRM_SYSTEM_DC,
+ CRM_SYSTEM_CRMD, NULL);
+
+ crm_xml_add_int(reply, F_CRM_JOIN_ID, join_id);
+
+ crm_debug("Confirming join-%d: sending local operation history to %s",
+ join_id, controld_globals.dc_name);
+
+ /*
+ * If this is the node's first join since the controller started on it,
+ * set its initial state (standby or member) according to the user's
+ * preference.
+ *
+ * We do not clear the LRM history here. Even if the DC failed to do it
+ * when we last left, removing them here creates a race condition if the
+ * controller is being recovered. Instead of a list of active resources
+ * from the executor, we may end up with a blank status section. If we
+ * are _NOT_ lucky, we will probe for the "wrong" instance of anonymous
+ * clones and end up with multiple active instances on the machine.
+ */
+ if (first_join
+ && !pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+
+ first_join = FALSE;
+ if (start_state) {
+ set_join_state(start_state);
+ }
+ }
+
+ send_cluster_message(crm_get_peer(0, controld_globals.dc_name),
+ crm_msg_crmd, reply, TRUE);
+ free_xml(reply);
+
+ if (AM_I_DC == FALSE) {
+ register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE,
+ __func__);
+ }
+
+ free_xml(tmp1);
+
+ /* Update the remote node cache with information about which node
+ * is hosting the connection.
+ */
+ remotes = pcmk__xe_match(input->msg, XML_CIB_TAG_NODES, NULL, NULL);
+ if (remotes != NULL) {
+ pcmk__xe_foreach_child(remotes, XML_CIB_TAG_NODE, update_conn_host_cache, NULL);
+ }
+
+ } else {
+ crm_err("Could not confirm join-%d with %s: Local operation history "
+ "failed", join_id, controld_globals.dc_name);
+ register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
+ }
+}
diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
new file mode 100644
index 0000000..f82b132
--- /dev/null
+++ b/daemons/controld/controld_join_dc.c
@@ -0,0 +1,987 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <crm/crm.h>
+
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/cluster.h>
+
+#include <pacemaker-controld.h>
+
+static char *max_generation_from = NULL;
+static xmlNodePtr max_generation_xml = NULL;
+
+/*!
+ * \internal
+ * \brief Nodes from which a CIB sync has failed since the peer joined
+ *
+ * This table is of the form (<tt>node_name -> join_id</tt>). \p node_name is
+ * the name of a client node from which a CIB \p sync_from() call has failed in
+ * \p do_dc_join_finalize() since the client joined the cluster as a peer.
+ * \p join_id is the ID of the join round in which the \p sync_from() failed,
+ * and is intended for use in nack log messages.
+ */
+static GHashTable *failed_sync_nodes = NULL;
+
+void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
+void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
+gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
+
+/* Numeric counter used to identify join rounds (an unsigned int would be
+ * appropriate, except we get and set it in XML as int)
+ */
+static int current_join_id = 0;
+
+/*!
+ * \internal
+ * \brief Destroy the hash table containing failed sync nodes
+ */
+void
+controld_destroy_failed_sync_table(void)
+{
+ if (failed_sync_nodes != NULL) {
+ g_hash_table_destroy(failed_sync_nodes);
+ failed_sync_nodes = NULL;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Remove a node from the failed sync nodes table if present
+ *
+ * \param[in] node_name Node name to remove
+ */
+void
+controld_remove_failed_sync_node(const char *node_name)
+{
+ if (failed_sync_nodes != NULL) {
+ g_hash_table_remove(failed_sync_nodes, (gchar *) node_name);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add to a hash table a node whose CIB failed to sync
+ *
+ * \param[in] node_name Name of node whose CIB failed to sync
+ * \param[in] join_id Join round when the failure occurred
+ */
+static void
+record_failed_sync_node(const char *node_name, gint join_id)
+{
+ if (failed_sync_nodes == NULL) {
+ failed_sync_nodes = pcmk__strikey_table(g_free, NULL);
+ }
+
+ /* If the node is already in the table then we failed to nack it during the
+ * filter offer step
+ */
+ CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name),
+ GINT_TO_POINTER(join_id)));
+}
+
+/*!
+ * \internal
+ * \brief Look up a node name in the failed sync table
+ *
+ * \param[in] node_name Name of node to look up
+ * \param[out] join_id Where to store the join ID of when the sync failed
+ *
+ * \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the
+ * node name was found, or \p pcmk_rc_node_unknown otherwise.
+ * \note \p *join_id is set to -1 if the node is not found.
+ */
+static int
+lookup_failed_sync_node(const char *node_name, gint *join_id)
+{
+ *join_id = -1;
+
+ if (failed_sync_nodes != NULL) {
+ gpointer result = g_hash_table_lookup(failed_sync_nodes,
+ (gchar *) node_name);
+ if (result != NULL) {
+ *join_id = GPOINTER_TO_INT(result);
+ return pcmk_rc_ok;
+ }
+ }
+ return pcmk_rc_node_unknown;
+}
+
+void
+crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase)
+{
+ enum crm_join_phase last = 0;
+
+ CRM_CHECK(node != NULL, return);
+
+ /* Remote nodes do not participate in joins */
+ if (pcmk_is_set(node->flags, crm_remote_node)) {
+ return;
+ }
+
+ last = node->join;
+
+ if(phase == last) {
+ crm_trace("Node %s join-%d phase is still %s "
+ CRM_XS " nodeid=%u source=%s",
+ node->uname, current_join_id, crm_join_phase_str(last),
+ node->id, source);
+
+ } else if ((phase <= crm_join_none) || (phase == (last + 1))) {
+ node->join = phase;
+ crm_trace("Node %s join-%d phase is now %s (was %s) "
+ CRM_XS " nodeid=%u source=%s",
+ node->uname, current_join_id, crm_join_phase_str(phase),
+ crm_join_phase_str(last), node->id, source);
+
+ } else {
+ crm_warn("Rejecting join-%d phase update for node %s because "
+ "can't go from %s to %s " CRM_XS " nodeid=%u source=%s",
+ current_join_id, node->uname, crm_join_phase_str(last),
+ crm_join_phase_str(phase), node->id, source);
+ }
+}
+
+static void
+start_join_round(void)
+{
+ GHashTableIter iter;
+ crm_node_t *peer = NULL;
+
+ crm_debug("Starting new join round join-%d", current_join_id);
+
+ g_hash_table_iter_init(&iter, crm_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
+ crm_update_peer_join(__func__, peer, crm_join_none);
+ }
+ if (max_generation_from != NULL) {
+ free(max_generation_from);
+ max_generation_from = NULL;
+ }
+ if (max_generation_xml != NULL) {
+ free_xml(max_generation_xml);
+ max_generation_xml = NULL;
+ }
+ controld_clear_fsa_input_flags(R_HAVE_CIB);
+ controld_forget_all_cib_replace_calls();
+}
+
+/*!
+ * \internal
+ * \brief Create a join message from the DC
+ *
+ * \param[in] join_op Join operation name
+ * \param[in] host_to Recipient of message
+ */
+static xmlNode *
+create_dc_message(const char *join_op, const char *host_to)
+{
+ xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD,
+ CRM_SYSTEM_DC, NULL);
+
+ /* Identify which election this is a part of */
+ crm_xml_add_int(msg, F_CRM_JOIN_ID, current_join_id);
+
+ /* Add a field specifying whether the DC is shutting down. This keeps the
+ * joining node from fencing the old DC if it becomes the new DC.
+ */
+ pcmk__xe_set_bool_attr(msg, F_CRM_DC_LEAVING,
+ pcmk_is_set(controld_globals.fsa_input_register,
+ R_SHUTDOWN));
+ return msg;
+}
+
+static void
+join_make_offer(gpointer key, gpointer value, gpointer user_data)
+{
+ xmlNode *offer = NULL;
+ crm_node_t *member = (crm_node_t *)value;
+
+ CRM_ASSERT(member != NULL);
+ if (crm_is_peer_active(member) == FALSE) {
+ crm_info("Not making join-%d offer to inactive node %s",
+ current_join_id,
+ (member->uname? member->uname : "with unknown name"));
+ if(member->expected == NULL && pcmk__str_eq(member->state, CRM_NODE_LOST, pcmk__str_casei)) {
+ /* You would think this unsafe, but in fact this plus an
+ * active resource is what causes it to be fenced.
+ *
+ * Yes, this does mean that any node that dies at the same
+ * time as the old DC and is not running resource (still)
+ * won't be fenced.
+ *
+ * I'm not happy about this either.
+ */
+ pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
+ }
+ return;
+ }
+
+ if (member->uname == NULL) {
+ crm_info("Not making join-%d offer to node uuid %s with unknown name",
+ current_join_id, member->uuid);
+ return;
+ }
+
+ if (controld_globals.membership_id != crm_peer_seq) {
+ controld_globals.membership_id = crm_peer_seq;
+ crm_info("Making join-%d offers based on membership event %llu",
+ current_join_id, crm_peer_seq);
+ }
+
+ if(user_data && member->join > crm_join_none) {
+ crm_info("Not making join-%d offer to already known node %s (%s)",
+ current_join_id, member->uname,
+ crm_join_phase_str(member->join));
+ return;
+ }
+
+ crm_update_peer_join(__func__, (crm_node_t*)member, crm_join_none);
+
+ offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname);
+
+ // Advertise our feature set so the joining node can bail if not compatible
+ crm_xml_add(offer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
+
+ crm_info("Sending join-%d offer to %s", current_join_id, member->uname);
+ send_cluster_message(member, crm_msg_crmd, offer, TRUE);
+ free_xml(offer);
+
+ crm_update_peer_join(__func__, member, crm_join_welcomed);
+}
+
+/* A_DC_JOIN_OFFER_ALL */
+void
+do_dc_join_offer_all(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ int count;
+
+ /* Reset everyone's status back to down or in_ccm in the CIB.
+ * Any nodes that are active in the CIB but not in the cluster membership
+ * will be seen as offline by the scheduler anyway.
+ */
+ current_join_id++;
+ start_join_round();
+
+ update_dc(NULL);
+ if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
+ crm_info("A new node joined the cluster");
+ }
+ g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
+
+ count = crmd_join_phase_count(crm_join_welcomed);
+ crm_info("Waiting on join-%d requests from %d outstanding node%s",
+ current_join_id, count, pcmk__plural_s(count));
+
+ // Don't waste time by invoking the scheduler yet
+}
+
+/* A_DC_JOIN_OFFER_ONE */
+void
+do_dc_join_offer_one(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ crm_node_t *member;
+ ha_msg_input_t *welcome = NULL;
+ int count;
+ const char *join_to = NULL;
+
+ if (msg_data->data == NULL) {
+ crm_info("Making join-%d offers to any unconfirmed nodes "
+ "because an unknown node joined", current_join_id);
+ g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
+ check_join_state(cur_state, __func__);
+ return;
+ }
+
+ welcome = fsa_typed_data(fsa_dt_ha_msg);
+ if (welcome == NULL) {
+ // fsa_typed_data() already logged an error
+ return;
+ }
+
+ join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM);
+ if (join_to == NULL) {
+ crm_err("Can't make join-%d offer to unknown node", current_join_id);
+ return;
+ }
+ member = crm_get_peer(0, join_to);
+
+ /* It is possible that a node will have been sick or starting up when the
+ * original offer was made. However, it will either re-announce itself in
+ * due course, or we can re-store the original offer on the client.
+ */
+
+ crm_update_peer_join(__func__, member, crm_join_none);
+ join_make_offer(NULL, member, NULL);
+
+ /* If the offer isn't to the local node, make an offer to the local node as
+ * well, to ensure the correct value for max_generation_from.
+ */
+ if (strcasecmp(join_to, controld_globals.our_nodename) != 0) {
+ member = crm_get_peer(0, controld_globals.our_nodename);
+ join_make_offer(NULL, member, NULL);
+ }
+
+ /* This was a genuine join request; cancel any existing transition and
+ * invoke the scheduler.
+ */
+ abort_transition(INFINITY, pcmk__graph_restart, "Node join", NULL);
+
+ count = crmd_join_phase_count(crm_join_welcomed);
+ crm_info("Waiting on join-%d requests from %d outstanding node%s",
+ current_join_id, count, pcmk__plural_s(count));
+
+ // Don't waste time by invoking the scheduler yet
+}
+
+static int
+compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
+{
+ const char *elem_l = crm_element_value(left, field);
+ const char *elem_r = crm_element_value(right, field);
+
+ long long int_elem_l;
+ long long int_elem_r;
+
+ pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
+ pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
+
+ if (int_elem_l < int_elem_r) {
+ return -1;
+
+ } else if (int_elem_l > int_elem_r) {
+ return 1;
+ }
+
+ return 0;
+}
+
+/* A_DC_JOIN_PROCESS_REQ */
+void
+do_dc_join_filter_offer(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ xmlNode *generation = NULL;
+
+ int cmp = 0;
+ int join_id = -1;
+ int count = 0;
+ gint value = 0;
+ gboolean ack_nack_bool = TRUE;
+ ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
+
+ const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
+ const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE);
+ const char *join_version = crm_element_value(join_ack->msg,
+ XML_ATTR_CRM_VERSION);
+ crm_node_t *join_node = NULL;
+
+ if (join_from == NULL) {
+ crm_err("Ignoring invalid join request without node name");
+ return;
+ }
+ join_node = crm_get_peer(0, join_from);
+
+ crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
+ if (join_id != current_join_id) {
+ crm_debug("Ignoring join-%d request from %s because we are on join-%d",
+ join_id, join_from, current_join_id);
+ check_join_state(cur_state, __func__);
+ return;
+ }
+
+ generation = join_ack->xml;
+ if (max_generation_xml != NULL && generation != NULL) {
+ int lpc = 0;
+
+ const char *attributes[] = {
+ XML_ATTR_GENERATION_ADMIN,
+ XML_ATTR_GENERATION,
+ XML_ATTR_NUMUPDATES,
+ };
+
+ for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) {
+ cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]);
+ }
+ }
+
+ if (ref == NULL) {
+ ref = "none"; // for logging only
+ }
+
+ if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) {
+ crm_err("Rejecting join-%d request from node %s because we failed to "
+ "sync its CIB in join-%d " CRM_XS " ref=%s",
+ join_id, join_from, value, ref);
+ ack_nack_bool = FALSE;
+
+ } else if (!crm_is_peer_active(join_node)) {
+ if (match_down_event(join_from) != NULL) {
+ /* The join request was received after the node was fenced or
+ * otherwise shutdown in a way that we're aware of. No need to log
+ * an error in this rare occurrence; we know the client was recently
+ * shut down, and receiving a lingering in-flight request is not
+ * cause for alarm.
+ */
+ crm_debug("Rejecting join-%d request from inactive node %s "
+ CRM_XS " ref=%s", join_id, join_from, ref);
+ } else {
+ crm_err("Rejecting join-%d request from inactive node %s "
+ CRM_XS " ref=%s", join_id, join_from, ref);
+ }
+ ack_nack_bool = FALSE;
+
+ } else if (generation == NULL) {
+ crm_err("Rejecting invalid join-%d request from node %s "
+ "missing CIB generation " CRM_XS " ref=%s",
+ join_id, join_from, ref);
+ ack_nack_bool = FALSE;
+
+ } else if ((join_version == NULL)
+ || !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
+ crm_err("Rejecting join-%d request from node %s because feature set %s"
+ " is incompatible with ours (%s) " CRM_XS " ref=%s",
+ join_id, join_from, (join_version? join_version : "pre-3.1.0"),
+ CRM_FEATURE_SET, ref);
+ ack_nack_bool = FALSE;
+
+ } else if (max_generation_xml == NULL) {
+ const char *validation = crm_element_value(generation,
+ XML_ATTR_VALIDATION);
+
+ if (get_schema_version(validation) < 0) {
+ crm_err("Rejecting join-%d request from %s (with first CIB "
+ "generation) due to unknown schema version %s "
+ CRM_XS " ref=%s",
+ join_id, join_from, validation, ref);
+ ack_nack_bool = FALSE;
+
+ } else {
+ crm_debug("Accepting join-%d request from %s (with first CIB "
+ "generation) " CRM_XS " ref=%s",
+ join_id, join_from, ref);
+ max_generation_xml = copy_xml(generation);
+ pcmk__str_update(&max_generation_from, join_from);
+ }
+
+ } else if ((cmp < 0)
+ || ((cmp == 0)
+ && pcmk__str_eq(join_from, controld_globals.our_nodename,
+ pcmk__str_casei))) {
+ const char *validation = crm_element_value(generation,
+ XML_ATTR_VALIDATION);
+
+ if (get_schema_version(validation) < 0) {
+ crm_err("Rejecting join-%d request from %s (with better CIB "
+ "generation than current best from %s) due to unknown "
+ "schema version %s " CRM_XS " ref=%s",
+ join_id, join_from, max_generation_from, validation, ref);
+ ack_nack_bool = FALSE;
+
+ } else {
+ crm_debug("Accepting join-%d request from %s (with better CIB "
+ "generation than current best from %s) " CRM_XS " ref=%s",
+ join_id, join_from, max_generation_from, ref);
+ crm_log_xml_debug(max_generation_xml, "Old max generation");
+ crm_log_xml_debug(generation, "New max generation");
+
+ free_xml(max_generation_xml);
+ max_generation_xml = copy_xml(join_ack->xml);
+ pcmk__str_update(&max_generation_from, join_from);
+ }
+
+ } else {
+ crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s",
+ join_id, join_from, ref);
+ }
+
+ if (!ack_nack_bool) {
+ if (compare_version(join_version, "3.17.0") < 0) {
+ /* Clients with CRM_FEATURE_SET < 3.17.0 may respawn infinitely
+ * after a nack message, don't send one
+ */
+ crm_update_peer_join(__func__, join_node, crm_join_nack_quiet);
+ } else {
+ crm_update_peer_join(__func__, join_node, crm_join_nack);
+ }
+ pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);
+
+ } else {
+ crm_update_peer_join(__func__, join_node, crm_join_integrated);
+ pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
+ }
+
+ count = crmd_join_phase_count(crm_join_integrated);
+ crm_debug("%d node%s currently integrated in join-%d",
+ count, pcmk__plural_s(count), join_id);
+
+ if (check_join_state(cur_state, __func__) == FALSE) {
+ // Don't waste time by invoking the scheduler yet
+ count = crmd_join_phase_count(crm_join_welcomed);
+ crm_debug("Waiting on join-%d requests from %d outstanding node%s",
+ join_id, count, pcmk__plural_s(count));
+ }
+}
+
+/* A_DC_JOIN_FINALIZE */
+void
+do_dc_join_finalize(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ char *sync_from = NULL;
+ int rc = pcmk_ok;
+ int count_welcomed = crmd_join_phase_count(crm_join_welcomed);
+ int count_finalizable = crmd_join_phase_count(crm_join_integrated)
+ + crmd_join_phase_count(crm_join_nack)
+ + crmd_join_phase_count(crm_join_nack_quiet);
+
+ /* This we can do straight away and avoid clients timing us out
+ * while we compute the latest CIB
+ */
+ if (count_welcomed != 0) {
+ crm_debug("Waiting on join-%d requests from %d outstanding node%s "
+ "before finalizing join", current_join_id, count_welcomed,
+ pcmk__plural_s(count_welcomed));
+ crmd_join_phase_log(LOG_DEBUG);
+ /* crmd_fsa_stall(FALSE); Needed? */
+ return;
+
+ } else if (count_finalizable == 0) {
+ crm_debug("Finalization not needed for join-%d at the current time",
+ current_join_id);
+ crmd_join_phase_log(LOG_DEBUG);
+ check_join_state(controld_globals.fsa_state, __func__);
+ return;
+ }
+
+ controld_clear_fsa_input_flags(R_HAVE_CIB);
+ if (pcmk__str_eq(max_generation_from, controld_globals.our_nodename,
+ pcmk__str_null_matches|pcmk__str_casei)) {
+ controld_set_fsa_input_flags(R_HAVE_CIB);
+ }
+
+ if (!controld_globals.transition_graph->complete) {
+ crm_warn("Delaying join-%d finalization while transition in progress",
+ current_join_id);
+ crmd_join_phase_log(LOG_DEBUG);
+ crmd_fsa_stall(FALSE);
+ return;
+ }
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
+ // Send our CIB out to everyone
+ pcmk__str_update(&sync_from, controld_globals.our_nodename);
+ crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
+ current_join_id, count_finalizable,
+ pcmk__plural_s(count_finalizable));
+ crm_log_xml_debug(max_generation_xml, "Requested CIB version");
+
+ } else {
+ // Ask for the agreed best CIB
+ pcmk__str_update(&sync_from, max_generation_from);
+ crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
+ current_join_id, count_finalizable,
+ pcmk__plural_s(count_finalizable), sync_from);
+ crm_log_xml_notice(max_generation_xml, "Requested CIB version");
+ }
+ crmd_join_phase_log(LOG_DEBUG);
+
+ rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn,
+ sync_from, NULL, cib_none);
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
+ controld_record_cib_replace_call(rc);
+ }
+ fsa_register_cib_callback(rc, sync_from, finalize_sync_callback);
+}
+
+void
+free_max_generation(void)
+{
+ free(max_generation_from);
+ max_generation_from = NULL;
+
+ free_xml(max_generation_xml);
+ max_generation_xml = NULL;
+}
+
+void
+finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ CRM_LOG_ASSERT(-EPERM != rc);
+
+ controld_forget_cib_replace_call(call_id);
+
+ if (rc != pcmk_ok) {
+ const char *sync_from = (const char *) user_data;
+
+ do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
+ "Could not sync CIB from %s in join-%d: %s",
+ sync_from, current_join_id, pcmk_strerror(rc));
+
+ if (rc != -pcmk_err_old_data) {
+ record_failed_sync_node(sync_from, current_join_id);
+ }
+
+ /* restart the whole join process */
+ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
+ __func__);
+
+ } else if (!AM_I_DC) {
+ crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
+
+ } else if (controld_globals.fsa_state != S_FINALIZE_JOIN) {
+ crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN "
+ "(%s)", current_join_id,
+ fsa_state2string(controld_globals.fsa_state));
+
+ } else {
+ controld_set_fsa_input_flags(R_HAVE_CIB);
+
+ /* make sure dc_uuid is re-set to us */
+ if (!check_join_state(controld_globals.fsa_state, __func__)) {
+ int count_finalizable = 0;
+
+ count_finalizable = crmd_join_phase_count(crm_join_integrated)
+ + crmd_join_phase_count(crm_join_nack)
+ + crmd_join_phase_count(crm_join_nack_quiet);
+
+ crm_debug("Notifying %d node%s of join-%d results",
+ count_finalizable, pcmk__plural_s(count_finalizable),
+ current_join_id);
+ g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL);
+ }
+ }
+}
+
+static void
+join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ fsa_data_t *msg_data = NULL;
+
+ if (rc == pcmk_ok) {
+ crm_debug("join-%d node history update (via CIB call %d) complete",
+ current_join_id, call_id);
+ check_join_state(controld_globals.fsa_state, __func__);
+
+ } else {
+ crm_err("join-%d node history update (via CIB call %d) failed: %s "
+ "(next transition may determine resource status incorrectly)",
+ current_join_id, call_id, pcmk_strerror(rc));
+ crm_log_xml_debug(msg, "failed");
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ }
+}
+
+/* A_DC_JOIN_PROCESS_ACK */
+void
+do_dc_join_ack(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ int join_id = -1;
+ ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
+ enum controld_section_e section = controld_section_lrm;
+ const int cib_opts = cib_scope_local|cib_can_create;
+
+ const char *op = crm_element_value(join_ack->msg, F_CRM_TASK);
+ const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
+ crm_node_t *peer = NULL;
+
+ // Sanity checks
+ if (join_from == NULL) {
+ crm_warn("Ignoring message received without node identification");
+ return;
+ }
+ if (op == NULL) {
+ crm_warn("Ignoring message received from %s without task", join_from);
+ return;
+ }
+
+ if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
+ crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
+ op, join_from, CRM_OP_JOIN_CONFIRM);
+ return;
+ }
+
+ if (crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id) != 0) {
+ crm_warn("Ignoring join confirmation from %s without valid join ID",
+ join_from);
+ return;
+ }
+
+ peer = crm_get_peer(0, join_from);
+ if (peer->join != crm_join_finalized) {
+ crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
+ "(currently %s not %s)",
+ join_id, join_from, crm_join_phase_str(peer->join),
+ crm_join_phase_str(crm_join_finalized));
+ return;
+ }
+
+ if (join_id != current_join_id) {
+ crm_err("Rejecting join-%d confirmation from %s "
+ "because currently on join-%d",
+ join_id, join_from, current_join_id);
+ crm_update_peer_join(__func__, peer, crm_join_nack);
+ return;
+ }
+
+ crm_update_peer_join(__func__, peer, crm_join_confirmed);
+
+ /* Update CIB with node's current executor state. A new transition will be
+ * triggered later, when the CIB notifies us of the change.
+ */
+ if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
+ section = controld_section_lrm_unlocked;
+ }
+ controld_delete_node_state(join_from, section, cib_scope_local);
+ if (pcmk__str_eq(join_from, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ xmlNode *now_dc_lrmd_state = controld_query_executor_state();
+
+ if (now_dc_lrmd_state != NULL) {
+ crm_debug("Updating local node history for join-%d "
+ "from query result", join_id);
+ controld_update_cib(XML_CIB_TAG_STATUS, now_dc_lrmd_state, cib_opts,
+ join_update_complete_callback);
+ free_xml(now_dc_lrmd_state);
+ } else {
+ crm_warn("Updating local node history from join-%d confirmation "
+ "because query failed", join_id);
+ controld_update_cib(XML_CIB_TAG_STATUS, join_ack->xml, cib_opts,
+ join_update_complete_callback);
+ }
+ } else {
+ crm_debug("Updating node history for %s from join-%d confirmation",
+ join_from, join_id);
+ controld_update_cib(XML_CIB_TAG_STATUS, join_ack->xml, cib_opts,
+ join_update_complete_callback);
+ }
+}
+
+void
+finalize_join_for(gpointer key, gpointer value, gpointer user_data)
+{
+ xmlNode *acknak = NULL;
+ xmlNode *tmp1 = NULL;
+ crm_node_t *join_node = value;
+ const char *join_to = join_node->uname;
+ bool integrated = false;
+
+ switch (join_node->join) {
+ case crm_join_integrated:
+ integrated = true;
+ break;
+ case crm_join_nack:
+ case crm_join_nack_quiet:
+ break;
+ default:
+ crm_trace("Not updating non-integrated and non-nacked node %s (%s) "
+ "for join-%d", join_to,
+ crm_join_phase_str(join_node->join), current_join_id);
+ return;
+ }
+
+ /* Update the <node> element with the node's name and UUID, in case they
+ * weren't known before
+ */
+ crm_trace("Updating node name and UUID in CIB for %s", join_to);
+ tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE);
+ set_uuid(tmp1, XML_ATTR_ID, join_node);
+ crm_xml_add(tmp1, XML_ATTR_UNAME, join_to);
+ fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1);
+ free_xml(tmp1);
+
+ if (join_node->join == crm_join_nack_quiet) {
+ crm_trace("Not sending nack message to node %s with feature set older "
+ "than 3.17.0", join_to);
+ return;
+ }
+
+ join_node = crm_get_peer(0, join_to);
+ if (!crm_is_peer_active(join_node)) {
+ /*
+ * NACK'ing nodes that the membership layer doesn't know about yet
+ * simply creates more churn
+ *
+ * Better to leave them waiting and let the join restart when
+ * the new membership event comes in
+ *
+ * All other NACKs (due to versions etc) should still be processed
+ */
+ pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
+ return;
+ }
+
+ // Acknowledge or nack node's join request
+ crm_debug("%sing join-%d request from %s",
+ integrated? "Acknowledg" : "Nack", current_join_id, join_to);
+ acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
+ pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated);
+
+ if (integrated) {
+ // No change needed for a nacked node
+ crm_update_peer_join(__func__, join_node, crm_join_finalized);
+ pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
+
+ /* Iterate through the remote peer cache and add information on which
+ * node hosts each to the ACK message. This keeps new controllers in
+ * sync with what has already happened.
+ */
+ if (crm_remote_peer_cache_size() != 0) {
+ GHashTableIter iter;
+ crm_node_t *node = NULL;
+ xmlNode *remotes = create_xml_node(acknak, XML_CIB_TAG_NODES);
+
+ g_hash_table_iter_init(&iter, crm_remote_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
+ xmlNode *remote = NULL;
+
+ if (!node->conn_host) {
+ continue;
+ }
+
+ remote = create_xml_node(remotes, XML_CIB_TAG_NODE);
+ pcmk__xe_set_props(remote,
+ XML_ATTR_ID, node->uname,
+ XML_CIB_TAG_STATE, node->state,
+ PCMK__XA_CONN_HOST, node->conn_host,
+ NULL);
+ }
+ }
+ }
+ send_cluster_message(join_node, crm_msg_crmd, acknak, TRUE);
+ free_xml(acknak);
+ return;
+}
+
+gboolean
+check_join_state(enum crmd_fsa_state cur_state, const char *source)
+{
+ static unsigned long long highest_seq = 0;
+
+ if (controld_globals.membership_id != crm_peer_seq) {
+ crm_debug("join-%d: Membership changed from %llu to %llu "
+ CRM_XS " highest=%llu state=%s for=%s",
+ current_join_id, controld_globals.membership_id, crm_peer_seq,
+ highest_seq, fsa_state2string(cur_state), source);
+ if(highest_seq < crm_peer_seq) {
+ /* Don't spam the FSA with duplicates */
+ highest_seq = crm_peer_seq;
+ register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
+ }
+
+ } else if (cur_state == S_INTEGRATION) {
+ if (crmd_join_phase_count(crm_join_welcomed) == 0) {
+ int count = crmd_join_phase_count(crm_join_integrated);
+
+ crm_debug("join-%d: Integration of %d peer%s complete "
+ CRM_XS " state=%s for=%s",
+ current_join_id, count, pcmk__plural_s(count),
+ fsa_state2string(cur_state), source);
+ register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
+ return TRUE;
+ }
+
+ } else if (cur_state == S_FINALIZE_JOIN) {
+ if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
+ crm_debug("join-%d: Delaying finalization until we have CIB "
+ CRM_XS " state=%s for=%s",
+ current_join_id, fsa_state2string(cur_state), source);
+ return TRUE;
+
+ } else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
+ int count = crmd_join_phase_count(crm_join_welcomed);
+
+ crm_debug("join-%d: Still waiting on %d welcomed node%s "
+ CRM_XS " state=%s for=%s",
+ current_join_id, count, pcmk__plural_s(count),
+ fsa_state2string(cur_state), source);
+ crmd_join_phase_log(LOG_DEBUG);
+
+ } else if (crmd_join_phase_count(crm_join_integrated) != 0) {
+ int count = crmd_join_phase_count(crm_join_integrated);
+
+ crm_debug("join-%d: Still waiting on %d integrated node%s "
+ CRM_XS " state=%s for=%s",
+ current_join_id, count, pcmk__plural_s(count),
+ fsa_state2string(cur_state), source);
+ crmd_join_phase_log(LOG_DEBUG);
+
+ } else if (crmd_join_phase_count(crm_join_finalized) != 0) {
+ int count = crmd_join_phase_count(crm_join_finalized);
+
+ crm_debug("join-%d: Still waiting on %d finalized node%s "
+ CRM_XS " state=%s for=%s",
+ current_join_id, count, pcmk__plural_s(count),
+ fsa_state2string(cur_state), source);
+ crmd_join_phase_log(LOG_DEBUG);
+
+ } else {
+ crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s",
+ current_join_id, fsa_state2string(cur_state), source);
+ register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+void
+do_dc_join_final(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
+ crm_update_quorum(crm_have_quorum, TRUE);
+}
+
+int crmd_join_phase_count(enum crm_join_phase phase)
+{
+ int count = 0;
+ crm_node_t *peer;
+ GHashTableIter iter;
+
+ g_hash_table_iter_init(&iter, crm_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
+ if(peer->join == phase) {
+ count++;
+ }
+ }
+ return count;
+}
+
+void crmd_join_phase_log(int level)
+{
+ crm_node_t *peer;
+ GHashTableIter iter;
+
+ g_hash_table_iter_init(&iter, crm_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
+ do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname,
+ crm_join_phase_str(peer->join));
+ }
+}
diff --git a/daemons/controld/controld_lrm.h b/daemons/controld/controld_lrm.h
new file mode 100644
index 0000000..25f3db3
--- /dev/null
+++ b/daemons/controld/controld_lrm.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+#ifndef CONTROLD_LRM__H
+# define CONTROLD_LRM__H
+
+#include <controld_messages.h>
+
+extern gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level);
+void lrm_clear_last_failure(const char *rsc_id, const char *node_name,
+ const char *operation, guint interval_ms);
+void lrm_op_callback(lrmd_event_data_t * op);
+lrmd_t *crmd_local_lrmd_conn(void);
+
+typedef struct resource_history_s {
+ char *id;
+ uint32_t last_callid;
+ lrmd_rsc_info_t rsc;
+ lrmd_event_data_t *last;
+ lrmd_event_data_t *failed;
+ GList *recurring_op_list;
+
+ /* Resources must be stopped using the same
+ * parameters they were started with. This hashtable
+ * holds the parameters that should be used for the next stop
+ * cmd on this resource. */
+ GHashTable *stop_params;
+} rsc_history_t;
+
+void history_free(gpointer data);
+
+enum active_op_e {
+ active_op_remove = (1 << 0),
+ active_op_cancelled = (1 << 1),
+};
+
+// In-flight action (recurring or pending)
+typedef struct active_op_s {
+ guint interval_ms;
+ int call_id;
+ uint32_t flags; // bitmask of active_op_e
+ time_t start_time;
+ time_t lock_time;
+ char *rsc_id;
+ char *op_type;
+ char *op_key;
+ char *user_data;
+ GHashTable *params;
+} active_op_t;
+
+#define controld_set_active_op_flags(active_op, flags_to_set) do { \
+ (active_op)->flags = pcmk__set_flags_as(__func__, __LINE__, \
+ LOG_TRACE, "Active operation", (active_op)->op_key, \
+ (active_op)->flags, (flags_to_set), #flags_to_set); \
+ } while (0)
+
+#define controld_clear_active_op_flags(active_op, flags_to_clear) do { \
+ (active_op)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
+ LOG_TRACE, "Active operation", (active_op)->op_key, \
+ (active_op)->flags, (flags_to_clear), #flags_to_clear); \
+ } while (0)
+
+typedef struct lrm_state_s {
+ const char *node_name;
+ void *conn; // Reserved for controld_execd_state.c usage
+ void *remote_ra_data; // Reserved for controld_remote_ra.c usage
+
+ GHashTable *resource_history;
+ GHashTable *active_ops; // Pending and recurring actions
+ GHashTable *deletion_ops;
+ GHashTable *rsc_info_cache;
+ GHashTable *metadata_cache; // key = class[:provider]:agent, value = ra_metadata_s
+
+ int num_lrm_register_fails;
+} lrm_state_t;
+
+struct pending_deletion_op_s {
+ char *rsc;
+ ha_msg_input_t *input;
+};
+
+/*!
+ * \brief Check whether this the local IPC connection to the executor
+ */
+gboolean
+lrm_state_is_local(lrm_state_t *lrm_state);
+
+/*!
+ * \brief Clear all state information from a single state entry.
+ * \note It sometimes useful to save metadata cache when it won't go stale.
+ * \note This does not close the executor connection
+ */
+void lrm_state_reset_tables(lrm_state_t * lrm_state, gboolean reset_metadata);
+GList *lrm_state_get_list(void);
+
+/*!
+ * \brief Initiate internal state tables
+ */
+gboolean lrm_state_init_local(void);
+
+/*!
+ * \brief Destroy all state entries and internal state tables
+ */
+void lrm_state_destroy_all(void);
+
+/*!
+ * \brief Destroy executor connection by node name
+ */
+void lrm_state_destroy(const char *node_name);
+
+/*!
+ * \brief Find lrm_state data by node name
+ */
+lrm_state_t *lrm_state_find(const char *node_name);
+
+/*!
+ * \brief Either find or create a new entry
+ */
+lrm_state_t *lrm_state_find_or_create(const char *node_name);
+
+/*!
+ * The functions below are wrappers for the executor API the the controller
+ * uses. These wrapper functions allow us to treat the controller's remote
+ * executor connection resources the same as regular resources. Internally,
+ * regular resources go to the executor, and remote connection resources are
+ * handled locally in the controller.
+ */
+void lrm_state_disconnect_only(lrm_state_t * lrm_state);
+void lrm_state_disconnect(lrm_state_t * lrm_state);
+int controld_connect_local_executor(lrm_state_t *lrm_state);
+int controld_connect_remote_executor(lrm_state_t *lrm_state, const char *server,
+ int port, int timeout);
+int lrm_state_is_connected(lrm_state_t * lrm_state);
+int lrm_state_poke_connection(lrm_state_t * lrm_state);
+
+int lrm_state_get_metadata(lrm_state_t * lrm_state,
+ const char *class,
+ const char *provider,
+ const char *agent, char **output, enum lrmd_call_options options);
+int lrm_state_cancel(lrm_state_t *lrm_state, const char *rsc_id,
+ const char *action, guint interval_ms);
+int controld_execute_resource_agent(lrm_state_t *lrm_state, const char *rsc_id,
+ const char *action, const char *userdata,
+ guint interval_ms, int timeout_ms,
+ int start_delay_ms,
+ GHashTable *parameters, int *call_id);
+lrmd_rsc_info_t *lrm_state_get_rsc_info(lrm_state_t * lrm_state,
+ const char *rsc_id, enum lrmd_call_options options);
+int lrm_state_register_rsc(lrm_state_t * lrm_state,
+ const char *rsc_id,
+ const char *class,
+ const char *provider, const char *agent, enum lrmd_call_options options);
+int lrm_state_unregister_rsc(lrm_state_t * lrm_state,
+ const char *rsc_id, enum lrmd_call_options options);
+
+// Functions used to manage remote executor connection resources
+void remote_lrm_op_callback(lrmd_event_data_t * op);
+gboolean is_remote_lrmd_ra(const char *agent, const char *provider, const char *id);
+lrmd_rsc_info_t *remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id);
+int remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
+ const char *action, guint interval_ms);
+int controld_execute_remote_agent(const lrm_state_t *lrm_state,
+ const char *rsc_id, const char *action,
+ const char *userdata,
+ guint interval_ms, int timeout_ms,
+ int start_delay_ms, lrmd_key_value_t *params,
+ int *call_id);
+void remote_ra_cleanup(lrm_state_t * lrm_state);
+void remote_ra_fail(const char *node_name);
+void remote_ra_process_pseudo(xmlNode *xml);
+gboolean remote_ra_is_in_maintenance(lrm_state_t * lrm_state);
+void remote_ra_process_maintenance_nodes(xmlNode *xml);
+gboolean remote_ra_controlling_guest(lrm_state_t * lrm_state);
+
+void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
+ active_op_t *pending, const xmlNode *action_xml);
+void controld_ack_event_directly(const char *to_host, const char *to_sys,
+ const lrmd_rsc_info_t *rsc,
+ lrmd_event_data_t *op, const char *rsc_id);
+void controld_rc2event(lrmd_event_data_t *event, int rc);
+void controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id);
+
+#endif
diff --git a/daemons/controld/controld_matrix.c b/daemons/controld/controld_matrix.c
new file mode 100644
index 0000000..a404f0a
--- /dev/null
+++ b/daemons/controld/controld_matrix.c
@@ -0,0 +1,1250 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdint.h> // uint64_t
+
+#include <pacemaker-controld.h>
+
+/*
+ * The state transition table. The rows are inputs, and
+ * the columns are states.
+ */
+static const enum crmd_fsa_state fsa_next_states[MAXINPUT][MAXSTATE] = {
+/* Got an I_NULL */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_CIB_OP */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_CIB_UPDATE */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_RECOVERY,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_RECOVERY,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_DC_TIMEOUT */
+ {
+ /* S_IDLE ==> */ S_RECOVERY,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_RECOVERY,
+ /* S_FINALIZE_JOIN ==> */ S_RECOVERY,
+ /* S_NOT_DC ==> */ S_ELECTION,
+ /* S_POLICY_ENGINE ==> */ S_RECOVERY,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RECOVERY,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_ELECTION,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_RECOVERY,
+ /* S_HALT ==> */ S_ELECTION,
+ },
+
+/* Got an I_ELECTION */
+ {
+ /* S_IDLE ==> */ S_ELECTION,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_ELECTION,
+ /* S_FINALIZE_JOIN ==> */ S_ELECTION,
+ /* S_NOT_DC ==> */ S_ELECTION,
+ /* S_POLICY_ENGINE ==> */ S_ELECTION,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_ELECTION,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_ELECTION,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_PE_CALC */
+ {
+ /* S_IDLE ==> */ S_POLICY_ENGINE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_RELEASE_DC */
+ {
+ /* S_IDLE ==> */ S_RELEASE_DC,
+ /* S_ELECTION ==> */ S_RELEASE_DC,
+ /* S_INTEGRATION ==> */ S_RELEASE_DC,
+ /* S_FINALIZE_JOIN ==> */ S_RELEASE_DC,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_RELEASE_DC,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_RELEASE_DC,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_ELECTION_DC */
+ {
+ /* S_IDLE ==> */ S_INTEGRATION,
+ /* S_ELECTION ==> */ S_INTEGRATION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_INTEGRATION,
+ /* S_NOT_DC ==> */ S_INTEGRATION,
+ /* S_POLICY_ENGINE ==> */ S_INTEGRATION,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_ERROR */
+ {
+ /* S_IDLE ==> */ S_RECOVERY,
+ /* S_ELECTION ==> */ S_RECOVERY,
+ /* S_INTEGRATION ==> */ S_RECOVERY,
+ /* S_FINALIZE_JOIN ==> */ S_RECOVERY,
+ /* S_NOT_DC ==> */ S_RECOVERY,
+ /* S_POLICY_ENGINE ==> */ S_RECOVERY,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RECOVERY,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_RECOVERY,
+ /* S_STOPPING ==> */ S_TERMINATE,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_RECOVERY,
+ /* S_HALT ==> */ S_RECOVERY,
+ },
+
+/* Got an I_FAIL */
+ {
+ /* S_IDLE ==> */ S_RECOVERY,
+ /* S_ELECTION ==> */ S_RELEASE_DC,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_INTEGRATION,
+ /* S_NOT_DC ==> */ S_RECOVERY,
+ /* S_POLICY_ENGINE ==> */ S_INTEGRATION,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STOPPING,
+ /* S_PENDING ==> */ S_STOPPING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_HALT ==> */ S_RELEASE_DC,
+ },
+
+/* Got an I_INTEGRATED */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_FINALIZE_JOIN,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_RECOVERY,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_FINALIZED */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_POLICY_ENGINE,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_RECOVERY,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_NODE_JOIN */
+ {
+ /* S_IDLE ==> */ S_INTEGRATION,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_INTEGRATION,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_INTEGRATION,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_NOT_DC */
+ {
+ /* S_IDLE ==> */ S_RECOVERY,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_RECOVERY,
+ /* S_FINALIZE_JOIN ==> */ S_RECOVERY,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_RECOVERY,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_NOT_DC,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_RECOVERY,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_RECOVERED */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_INTEGRATION,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_PENDING,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_RELEASE_FAIL */
+ {
+ /* S_IDLE ==> */ S_STOPPING,
+ /* S_ELECTION ==> */ S_STOPPING,
+ /* S_INTEGRATION ==> */ S_STOPPING,
+ /* S_FINALIZE_JOIN ==> */ S_STOPPING,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_STOPPING,
+ /* S_RECOVERY ==> */ S_STOPPING,
+ /* S_RELEASE_DC ==> */ S_STOPPING,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_STOPPING,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_RELEASE_SUCCESS */
+ {
+ /* S_IDLE ==> */ S_RECOVERY,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_RECOVERY,
+ /* S_FINALIZE_JOIN ==> */ S_RECOVERY,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_RECOVERY,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_PENDING,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_RECOVERY,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_RESTART */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_TE_SUCCESS */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_IDLE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_ROUTER */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_SHUTDOWN */
+ {
+ /* S_IDLE ==> */ S_POLICY_ENGINE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_STOPPING,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STOPPING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_HALT ==> */ S_ELECTION,
+ },
+
+/* Got an I_STOP */
+ {
+ /* S_IDLE ==> */ S_STOPPING,
+ /* S_ELECTION ==> */ S_STOPPING,
+ /* S_INTEGRATION ==> */ S_STOPPING,
+ /* S_FINALIZE_JOIN ==> */ S_STOPPING,
+ /* S_NOT_DC ==> */ S_STOPPING,
+ /* S_POLICY_ENGINE ==> */ S_STOPPING,
+ /* S_RECOVERY ==> */ S_STOPPING,
+ /* S_RELEASE_DC ==> */ S_STOPPING,
+ /* S_STARTING ==> */ S_STOPPING,
+ /* S_PENDING ==> */ S_STOPPING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_STOPPING,
+ /* S_HALT ==> */ S_STOPPING,
+ },
+
+/* Got an I_TERMINATE */
+ {
+ /* S_IDLE ==> */ S_TERMINATE,
+ /* S_ELECTION ==> */ S_TERMINATE,
+ /* S_INTEGRATION ==> */ S_TERMINATE,
+ /* S_FINALIZE_JOIN ==> */ S_TERMINATE,
+ /* S_NOT_DC ==> */ S_TERMINATE,
+ /* S_POLICY_ENGINE ==> */ S_TERMINATE,
+ /* S_RECOVERY ==> */ S_TERMINATE,
+ /* S_RELEASE_DC ==> */ S_TERMINATE,
+ /* S_STARTING ==> */ S_TERMINATE,
+ /* S_PENDING ==> */ S_TERMINATE,
+ /* S_STOPPING ==> */ S_TERMINATE,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TERMINATE,
+ /* S_HALT ==> */ S_TERMINATE,
+ },
+
+/* Got an I_STARTUP */
+ {
+ /* S_IDLE ==> */ S_RECOVERY,
+ /* S_ELECTION ==> */ S_RECOVERY,
+ /* S_INTEGRATION ==> */ S_RECOVERY,
+ /* S_FINALIZE_JOIN ==> */ S_RECOVERY,
+ /* S_NOT_DC ==> */ S_RECOVERY,
+ /* S_POLICY_ENGINE ==> */ S_RECOVERY,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_RECOVERY,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_PE_SUCCESS */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_JOIN_OFFER */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_PENDING,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_JOIN_REQUEST */
+ {
+ /* S_IDLE ==> */ S_INTEGRATION,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_INTEGRATION,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_INTEGRATION,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_JOIN_RESULT */
+ {
+ /* S_IDLE ==> */ S_INTEGRATION,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_PENDING,
+ /* S_POLICY_ENGINE ==> */ S_INTEGRATION,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_WAIT_FOR_EVENT */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_DC_HEARTBEAT */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_LRM_EVENT */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_PENDING */
+ {
+ /* S_IDLE ==> */ S_PENDING,
+ /* S_ELECTION ==> */ S_PENDING,
+ /* S_INTEGRATION ==> */ S_PENDING,
+ /* S_FINALIZE_JOIN ==> */ S_PENDING,
+ /* S_NOT_DC ==> */ S_PENDING,
+ /* S_POLICY_ENGINE ==> */ S_PENDING,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_PENDING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_PENDING,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_HALT */
+ {
+ /* S_IDLE ==> */ S_HALT,
+ /* S_ELECTION ==> */ S_HALT,
+ /* S_INTEGRATION ==> */ S_HALT,
+ /* S_FINALIZE_JOIN ==> */ S_HALT,
+ /* S_NOT_DC ==> */ S_HALT,
+ /* S_POLICY_ENGINE ==> */ S_HALT,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_HALT,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_HALT,
+ /* S_HALT ==> */ S_HALT,
+ },
+};
+
+/*
+ * The action table. Each entry is a set of actions to take or-ed
+ * together. Like the state table, the rows are inputs, and
+ * the columns are states.
+ */
+
+/* NOTE: In the fsa, the actions are extracted then state is updated. */
+
+static const uint64_t fsa_actions[MAXINPUT][MAXSTATE] = {
+
+/* Got an I_NULL */
+ {
+ /* S_IDLE ==> */ A_NOTHING,
+ /* S_ELECTION ==> */ A_NOTHING,
+ /* S_INTEGRATION ==> */ A_NOTHING,
+ /* S_FINALIZE_JOIN ==> */ A_NOTHING,
+ /* S_NOT_DC ==> */ A_NOTHING,
+ /* S_POLICY_ENGINE ==> */ A_NOTHING,
+ /* S_RECOVERY ==> */ A_NOTHING,
+ /* S_RELEASE_DC ==> */ A_NOTHING,
+ /* S_STARTING ==> */ A_NOTHING,
+ /* S_PENDING ==> */ A_NOTHING,
+ /* S_STOPPING ==> */ A_NOTHING,
+ /* S_TERMINATE ==> */ A_NOTHING,
+ /* S_TRANSITION_ENGINE ==> */ A_NOTHING,
+ /* S_HALT ==> */ A_NOTHING,
+ },
+
+/* Got an I_CIB_OP */
+ {
+ /* S_IDLE ==> */ A_ERROR,
+ /* S_ELECTION ==> */ A_ERROR,
+ /* S_INTEGRATION ==> */ A_ERROR,
+ /* S_FINALIZE_JOIN ==> */ A_ERROR,
+ /* S_NOT_DC ==> */ A_ERROR,
+ /* S_POLICY_ENGINE ==> */ A_ERROR,
+ /* S_RECOVERY ==> */ A_ERROR,
+ /* S_RELEASE_DC ==> */ A_ERROR,
+ /* S_STARTING ==> */ A_ERROR,
+ /* S_PENDING ==> */ A_ERROR,
+ /* S_STOPPING ==> */ A_ERROR,
+ /* S_TERMINATE ==> */ A_ERROR,
+ /* S_TRANSITION_ENGINE ==> */ A_ERROR,
+ /* S_HALT ==> */ A_ERROR,
+ },
+
+/* Got an I_CIB_UPDATE */
+ {
+ /* S_IDLE ==> */ A_LOG,
+ /* S_ELECTION ==> */ A_LOG,
+ /* S_INTEGRATION ==> */ A_WARN,
+ /* S_FINALIZE_JOIN ==> */ A_WARN,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_LOG,
+ /* S_RECOVERY ==> */ A_WARN,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_LOG,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_DC_TIMEOUT */
+ {
+ /* S_IDLE ==> */ A_WARN,
+ /* S_ELECTION ==> */ A_ELECTION_VOTE,
+ /* S_INTEGRATION ==> */ A_WARN,
+ /* S_FINALIZE_JOIN ==> */ A_WARN,
+ /* S_NOT_DC ==> */ A_ELECTION_VOTE | A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_WARN,
+ /* S_RECOVERY ==> */ A_NOTHING,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_ELECTION_VOTE | A_WARN,
+ /* S_STOPPING ==> */ A_NOTHING,
+ /* S_TERMINATE ==> */ A_NOTHING,
+ /* S_TRANSITION_ENGINE ==> */ A_TE_CANCEL | A_WARN,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_ELECTION */
+ {
+ /* S_IDLE ==> */ A_ELECTION_VOTE,
+ /* S_ELECTION ==> */ A_ELECTION_VOTE,
+ /* S_INTEGRATION ==> */ A_ELECTION_VOTE,
+ /* S_FINALIZE_JOIN ==> */ A_ELECTION_VOTE,
+ /* S_NOT_DC ==> */ A_ELECTION_VOTE,
+ /* S_POLICY_ENGINE ==> */ A_ELECTION_VOTE,
+ /* S_RECOVERY ==> */ A_LOG,
+ /* S_RELEASE_DC ==> */ A_LOG,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_ELECTION_VOTE,
+ /* S_STOPPING ==> */ A_LOG,
+ /* S_TERMINATE ==> */ A_LOG,
+ /* S_TRANSITION_ENGINE ==> */ A_ELECTION_VOTE,
+ /* S_HALT ==> */ A_ELECTION_VOTE,
+ },
+
+/* Got an I_PE_CALC */
+ {
+ /* S_IDLE ==> */ A_PE_INVOKE,
+ /* S_ELECTION ==> */ A_NOTHING,
+ /* S_INTEGRATION ==> */ A_NOTHING,
+ /* S_FINALIZE_JOIN ==> */ A_NOTHING,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_PE_INVOKE,
+ /* S_RECOVERY ==> */ A_NOTHING,
+ /* S_RELEASE_DC ==> */ A_NOTHING,
+ /* S_STARTING ==> */ A_ERROR,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_ERROR,
+ /* S_TRANSITION_ENGINE ==> */ A_PE_INVOKE,
+ /* S_HALT ==> */ A_ERROR,
+ },
+
+/* Got an I_RELEASE_DC */
+ {
+ /* S_IDLE ==> */ O_RELEASE,
+ /* S_ELECTION ==> */ O_RELEASE,
+ /* S_INTEGRATION ==> */ O_RELEASE | A_WARN,
+ /* S_FINALIZE_JOIN ==> */ O_RELEASE | A_WARN,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ O_RELEASE | A_WARN,
+ /* S_RECOVERY ==> */ O_RELEASE,
+ /* S_RELEASE_DC ==> */ O_RELEASE | A_WARN,
+ /* S_STARTING ==> */ A_ERROR,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ O_RELEASE | A_WARN,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_ELECTION_DC */
+ {
+ /* S_IDLE ==> */ A_WARN | A_ELECTION_VOTE,
+ /* S_ELECTION ==> */
+ A_LOG | A_DC_TAKEOVER | A_PE_START | A_TE_START | A_DC_JOIN_OFFER_ALL | A_DC_TIMER_STOP,
+ /* S_INTEGRATION ==> */ A_WARN | A_ELECTION_VOTE | A_DC_JOIN_OFFER_ALL,
+ /* S_FINALIZE_JOIN ==> */ A_WARN | A_ELECTION_VOTE | A_DC_JOIN_OFFER_ALL,
+ /* S_NOT_DC ==> */ A_LOG | A_ELECTION_VOTE,
+ /* S_POLICY_ENGINE ==> */ A_WARN | A_ELECTION_VOTE,
+ /* S_RECOVERY ==> */ A_WARN,
+ /* S_RELEASE_DC ==> */ A_WARN | A_ELECTION_VOTE,
+ /* S_STARTING ==> */ A_LOG | A_WARN,
+ /* S_PENDING ==> */ A_LOG | A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_WARN | A_ELECTION_VOTE,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_ERROR */
+ {
+ /* S_IDLE ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START,
+ /* S_ELECTION ==> */ A_ERROR | A_RECOVER | O_RELEASE,
+ /* S_INTEGRATION ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START,
+ /* S_FINALIZE_JOIN ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START,
+ /* S_NOT_DC ==> */ A_ERROR | A_RECOVER,
+ /* S_POLICY_ENGINE ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START,
+ /* S_RECOVERY ==> */ A_ERROR | O_RELEASE,
+ /* S_RELEASE_DC ==> */ A_ERROR | A_RECOVER,
+ /* S_STARTING ==> */ A_ERROR | A_RECOVER,
+ /* S_PENDING ==> */ A_ERROR | A_RECOVER,
+ /* S_STOPPING ==> */ A_ERROR | A_EXIT_1,
+ /* S_TERMINATE ==> */ A_ERROR | A_EXIT_1,
+ /* S_TRANSITION_ENGINE ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START,
+ /* S_HALT ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START,
+ },
+
+/* Got an I_FAIL */
+ {
+ /* S_IDLE ==> */ A_WARN,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_WARN | A_DC_JOIN_OFFER_ALL,
+ /* S_FINALIZE_JOIN ==> */ A_WARN | A_DC_JOIN_OFFER_ALL,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_WARN | A_DC_JOIN_OFFER_ALL | A_TE_CANCEL,
+ /* S_RECOVERY ==> */ A_WARN | O_RELEASE,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN | A_EXIT_1,
+ /* S_TRANSITION_ENGINE ==> */ A_WARN | O_LRM_RECONNECT | A_PE_INVOKE | A_TE_CANCEL,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_INTEGRATED */
+ {
+ /* S_IDLE ==> */ A_NOTHING,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_DC_JOIN_FINALIZE,
+ /* S_FINALIZE_JOIN ==> */ A_WARN,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_NOTHING,
+ /* S_RECOVERY ==> */ A_WARN,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_NOTHING,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_FINALIZED */
+ {
+ /* S_IDLE ==> */ A_NOTHING,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_WARN,
+ /* S_FINALIZE_JOIN ==> */ A_DC_JOIN_FINAL | A_TE_CANCEL,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_NOTHING,
+ /* S_RECOVERY ==> */ A_WARN,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_NOTHING,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_NODE_JOIN */
+ {
+ /* S_IDLE ==> */ A_TE_HALT | A_DC_JOIN_OFFER_ONE,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_DC_JOIN_OFFER_ONE,
+ /* S_FINALIZE_JOIN ==> */ A_DC_JOIN_OFFER_ONE,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_DC_JOIN_OFFER_ONE,
+ /* S_RECOVERY ==> */ A_WARN,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_TE_HALT | A_DC_JOIN_OFFER_ONE,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_NOT_DC */
+ {
+ /* S_IDLE ==> */ A_WARN | O_RELEASE,
+ /* S_ELECTION ==> */ A_ERROR | A_ELECTION_START | A_DC_TIMER_STOP,
+ /* S_INTEGRATION ==> */ A_ERROR | O_RELEASE,
+ /* S_FINALIZE_JOIN ==> */ A_ERROR | O_RELEASE,
+ /* S_NOT_DC ==> */ A_LOG,
+ /* S_POLICY_ENGINE ==> */ A_ERROR | O_RELEASE,
+ /* S_RECOVERY ==> */ A_ERROR | O_RELEASE,
+ /* S_RELEASE_DC ==> */ A_ERROR | O_RELEASE,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_LOG | A_DC_TIMER_STOP,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_ERROR | O_RELEASE,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_RECOVERED */
+ {
+ /* S_IDLE ==> */ A_WARN,
+ /* S_ELECTION ==> */ A_ELECTION_VOTE,
+ /* S_INTEGRATION ==> */ A_WARN,
+ /* S_FINALIZE_JOIN ==> */ A_WARN,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_WARN,
+ /* S_RECOVERY ==> */ A_LOG,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_WARN,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_RELEASE_FAIL */
+ {
+ /* S_IDLE ==> */ A_WARN,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_WARN,
+ /* S_FINALIZE_JOIN ==> */ A_WARN,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_NOTHING,
+ /* S_RECOVERY ==> */ A_WARN | A_SHUTDOWN_REQ,
+ /* S_RELEASE_DC ==> */ A_NOTHING,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_WARN,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_RELEASE_SUCCESS */
+ {
+ /* S_IDLE ==> */ A_WARN,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_WARN,
+ /* S_FINALIZE_JOIN ==> */ A_WARN,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_WARN,
+ /* S_RECOVERY ==> */ A_WARN,
+ /* S_RELEASE_DC ==> */ A_LOG,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_LOG,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_WARN,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_RESTART */
+ {
+ /* S_IDLE ==> */ A_NOTHING,
+ /* S_ELECTION ==> */ A_LOG | A_ELECTION_VOTE,
+ /* S_INTEGRATION ==> */ A_LOG | A_DC_JOIN_OFFER_ALL,
+ /* S_FINALIZE_JOIN ==> */ A_LOG | A_DC_JOIN_FINALIZE,
+ /* S_NOT_DC ==> */ A_LOG | A_NOTHING,
+ /* S_POLICY_ENGINE ==> */ A_LOG | A_PE_INVOKE,
+ /* S_RECOVERY ==> */ A_LOG | A_RECOVER | O_RELEASE,
+ /* S_RELEASE_DC ==> */ A_LOG | O_RELEASE,
+ /* S_STARTING ==> */ A_LOG,
+ /* S_PENDING ==> */ A_LOG,
+ /* S_STOPPING ==> */ A_LOG,
+ /* S_TERMINATE ==> */ A_LOG,
+ /* S_TRANSITION_ENGINE ==> */ A_LOG | A_TE_INVOKE,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_TE_SUCCESS */
+ {
+ /* S_IDLE ==> */ A_LOG,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_WARN,
+ /* S_FINALIZE_JOIN ==> */ A_WARN,
+ /* S_NOT_DC ==> */ A_ERROR,
+ /* S_POLICY_ENGINE ==> */ A_WARN,
+ /* S_RECOVERY ==> */ A_RECOVER | A_WARN,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */ A_ERROR,
+ /* S_PENDING ==> */ A_ERROR,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_LOG,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_ROUTER */
+ {
+ /* S_IDLE ==> */ A_MSG_ROUTE,
+ /* S_ELECTION ==> */ A_MSG_ROUTE,
+ /* S_INTEGRATION ==> */ A_MSG_ROUTE,
+ /* S_FINALIZE_JOIN ==> */ A_MSG_ROUTE,
+ /* S_NOT_DC ==> */ A_MSG_ROUTE,
+ /* S_POLICY_ENGINE ==> */ A_MSG_ROUTE,
+ /* S_RECOVERY ==> */ A_MSG_ROUTE,
+ /* S_RELEASE_DC ==> */ A_MSG_ROUTE,
+ /* S_STARTING ==> */ A_MSG_ROUTE,
+ /* S_PENDING ==> */ A_MSG_ROUTE,
+ /* S_STOPPING ==> */ A_MSG_ROUTE,
+ /* S_TERMINATE ==> */ A_MSG_ROUTE,
+ /* S_TRANSITION_ENGINE ==> */ A_MSG_ROUTE,
+ /* S_HALT ==> */ A_WARN | A_MSG_ROUTE,
+ },
+
+/* Got an I_SHUTDOWN */
+ {
+ /* S_IDLE ==> */ A_LOG | A_SHUTDOWN_REQ,
+ /* S_ELECTION ==> */ A_LOG | A_SHUTDOWN_REQ | A_ELECTION_VOTE,
+ /* S_INTEGRATION ==> */ A_LOG | A_SHUTDOWN_REQ,
+ /* S_FINALIZE_JOIN ==> */ A_LOG | A_SHUTDOWN_REQ,
+ /* S_NOT_DC ==> */ A_SHUTDOWN_REQ,
+ /* S_POLICY_ENGINE ==> */ A_LOG | A_SHUTDOWN_REQ,
+ /* S_RECOVERY ==> */ A_WARN | O_EXIT | O_RELEASE,
+ /* S_RELEASE_DC ==> */ A_WARN | A_SHUTDOWN_REQ,
+ /* S_STARTING ==> */ A_WARN | O_EXIT,
+ /* S_PENDING ==> */ A_SHUTDOWN_REQ,
+ /* S_STOPPING ==> */ A_LOG,
+ /* S_TERMINATE ==> */ A_LOG,
+ /* S_TRANSITION_ENGINE ==> */ A_WARN | A_SHUTDOWN_REQ,
+ /* S_HALT ==> */ A_WARN | A_ELECTION_START | A_SHUTDOWN_REQ,
+ },
+
+/* Got an I_STOP */
+ {
+ /* S_IDLE ==> */ A_ERROR | O_RELEASE | O_EXIT,
+ /* S_ELECTION ==> */ O_RELEASE | O_EXIT,
+ /* S_INTEGRATION ==> */ A_WARN | O_RELEASE | O_EXIT,
+ /* S_FINALIZE_JOIN ==> */ A_ERROR | O_RELEASE | O_EXIT,
+ /* S_NOT_DC ==> */ O_EXIT,
+ /* S_POLICY_ENGINE ==> */ A_WARN | O_RELEASE | O_EXIT,
+ /* S_RECOVERY ==> */ A_ERROR | O_RELEASE | O_EXIT,
+ /* S_RELEASE_DC ==> */ A_ERROR | O_RELEASE | O_EXIT,
+ /* S_STARTING ==> */ O_EXIT,
+ /* S_PENDING ==> */ O_EXIT,
+ /* S_STOPPING ==> */ O_EXIT,
+ /* S_TERMINATE ==> */ A_ERROR | A_EXIT_1,
+ /* S_TRANSITION_ENGINE ==> */ A_LOG | O_RELEASE | O_EXIT,
+ /* S_HALT ==> */ O_RELEASE | O_EXIT | A_WARN,
+ },
+
+/* Got an I_TERMINATE */
+ {
+ /* S_IDLE ==> */ A_ERROR | O_EXIT,
+ /* S_ELECTION ==> */ A_ERROR | O_EXIT,
+ /* S_INTEGRATION ==> */ A_ERROR | O_EXIT,
+ /* S_FINALIZE_JOIN ==> */ A_ERROR | O_EXIT,
+ /* S_NOT_DC ==> */ A_ERROR | O_EXIT,
+ /* S_POLICY_ENGINE ==> */ A_ERROR | O_EXIT,
+ /* S_RECOVERY ==> */ A_ERROR | O_EXIT,
+ /* S_RELEASE_DC ==> */ A_ERROR | O_EXIT,
+ /* S_STARTING ==> */ O_EXIT,
+ /* S_PENDING ==> */ A_ERROR | O_EXIT,
+ /* S_STOPPING ==> */ O_EXIT,
+ /* S_TERMINATE ==> */ O_EXIT,
+ /* S_TRANSITION_ENGINE ==> */ A_ERROR | O_EXIT,
+ /* S_HALT ==> */ A_ERROR | O_EXIT,
+ },
+
+/* Got an I_STARTUP */
+ {
+ /* S_IDLE ==> */ A_WARN,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_WARN,
+ /* S_FINALIZE_JOIN ==> */ A_WARN,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_WARN,
+ /* S_RECOVERY ==> */ A_WARN,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */
+ A_LOG | A_STARTUP | A_CIB_START | A_LRM_CONNECT | A_HA_CONNECT | A_READCONFIG | A_STARTED,
+ /* S_PENDING ==> */ A_LOG,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_WARN,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_PE_SUCCESS */
+ {
+ /* S_IDLE ==> */ A_LOG,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_WARN,
+ /* S_FINALIZE_JOIN ==> */ A_WARN,
+ /* S_NOT_DC ==> */ A_NOTHING,
+ /* S_POLICY_ENGINE ==> */ A_TE_INVOKE,
+ /* S_RECOVERY ==> */ A_RECOVER | A_LOG,
+ /* S_RELEASE_DC ==> */ A_LOG,
+ /* S_STARTING ==> */ A_ERROR,
+ /* S_PENDING ==> */ A_LOG,
+ /* S_STOPPING ==> */ A_ERROR,
+ /* S_TERMINATE ==> */ A_ERROR,
+ /* S_TRANSITION_ENGINE ==> */ A_LOG,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_JOIN_OFFER */
+ {
+ /* S_IDLE ==> */ A_WARN | A_CL_JOIN_REQUEST,
+ /* S_ELECTION ==> */ A_WARN | A_ELECTION_VOTE,
+ /* S_INTEGRATION ==> */ A_CL_JOIN_REQUEST,
+ /* S_FINALIZE_JOIN ==> */ A_CL_JOIN_REQUEST,
+ /* S_NOT_DC ==> */ A_CL_JOIN_REQUEST | A_DC_TIMER_STOP,
+ /* S_POLICY_ENGINE ==> */ A_WARN | A_CL_JOIN_REQUEST,
+ /* S_RECOVERY ==> */ A_WARN | A_CL_JOIN_REQUEST | A_DC_TIMER_STOP,
+ /* S_RELEASE_DC ==> */ A_WARN | A_CL_JOIN_REQUEST | A_DC_TIMER_STOP,
+ /* S_STARTING ==> */ A_LOG,
+ /* S_PENDING ==> */ A_CL_JOIN_REQUEST | A_DC_TIMER_STOP,
+ /* S_STOPPING ==> */ A_LOG,
+ /* S_TERMINATE ==> */ A_LOG,
+ /* S_TRANSITION_ENGINE ==> */ A_WARN | A_CL_JOIN_REQUEST,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_JOIN_REQUEST */
+ {
+ /* S_IDLE ==> */ A_DC_JOIN_OFFER_ONE,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_DC_JOIN_PROCESS_REQ,
+ /* S_FINALIZE_JOIN ==> */ A_DC_JOIN_OFFER_ONE,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_DC_JOIN_OFFER_ONE,
+ /* S_RECOVERY ==> */ A_WARN,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_DC_JOIN_OFFER_ONE,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_JOIN_RESULT */
+ {
+ /* S_IDLE ==> */ A_ERROR | A_TE_HALT | A_DC_JOIN_OFFER_ALL,
+ /* S_ELECTION ==> */ A_LOG,
+ /* S_INTEGRATION ==> */ A_LOG | A_CL_JOIN_RESULT | A_DC_JOIN_PROCESS_ACK,
+ /* S_FINALIZE_JOIN ==> */ A_CL_JOIN_RESULT | A_DC_JOIN_PROCESS_ACK,
+ /* S_NOT_DC ==> */ A_ERROR | A_CL_JOIN_ANNOUNCE,
+ /* S_POLICY_ENGINE ==> */ A_ERROR | A_TE_HALT | A_DC_JOIN_OFFER_ALL,
+ /* S_RECOVERY ==> */ A_LOG,
+ /* S_RELEASE_DC ==> */ A_LOG,
+ /* S_STARTING ==> */ A_ERROR,
+ /* S_PENDING ==> */ A_CL_JOIN_RESULT,
+ /* S_STOPPING ==> */ A_ERROR,
+ /* S_TERMINATE ==> */ A_ERROR,
+ /* S_TRANSITION_ENGINE ==> */ A_ERROR | A_TE_HALT | A_DC_JOIN_OFFER_ALL,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_WAIT_FOR_EVENT */
+ {
+ /* S_IDLE ==> */ A_LOG,
+ /* S_ELECTION ==> */ A_LOG,
+ /* S_INTEGRATION ==> */ A_LOG,
+ /* S_FINALIZE_JOIN ==> */ A_LOG,
+ /* S_NOT_DC ==> */ A_LOG,
+ /* S_POLICY_ENGINE ==> */ A_LOG,
+ /* S_RECOVERY ==> */ A_LOG,
+ /* S_RELEASE_DC ==> */ A_LOG,
+ /* S_STARTING ==> */ A_LOG,
+ /* S_PENDING ==> */ A_LOG,
+ /* S_STOPPING ==> */ A_LOG,
+ /* S_TERMINATE ==> */ A_LOG,
+ /* S_TRANSITION_ENGINE ==> */ A_LOG,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_DC_HEARTBEAT */
+ {
+ /* S_IDLE ==> */ A_ERROR,
+ /* S_ELECTION ==> */ A_WARN | A_ELECTION_VOTE,
+ /* S_INTEGRATION ==> */ A_ERROR,
+ /* S_FINALIZE_JOIN ==> */ A_ERROR,
+ /* S_NOT_DC ==> */ A_NOTHING,
+ /* S_POLICY_ENGINE ==> */ A_ERROR,
+ /* S_RECOVERY ==> */ A_NOTHING,
+ /* S_RELEASE_DC ==> */ A_LOG,
+ /* S_STARTING ==> */ A_LOG,
+ /* S_PENDING ==> */ A_LOG | A_CL_JOIN_ANNOUNCE,
+ /* S_STOPPING ==> */ A_NOTHING,
+ /* S_TERMINATE ==> */ A_NOTHING,
+ /* S_TRANSITION_ENGINE ==> */ A_ERROR,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_LRM_EVENT */
+ {
+ /* S_IDLE ==> */ A_LRM_EVENT,
+ /* S_ELECTION ==> */ A_LRM_EVENT,
+ /* S_INTEGRATION ==> */ A_LRM_EVENT,
+ /* S_FINALIZE_JOIN ==> */ A_LRM_EVENT,
+ /* S_NOT_DC ==> */ A_LRM_EVENT,
+ /* S_POLICY_ENGINE ==> */ A_LRM_EVENT,
+ /* S_RECOVERY ==> */ A_LRM_EVENT,
+ /* S_RELEASE_DC ==> */ A_LRM_EVENT,
+ /* S_STARTING ==> */ A_LRM_EVENT,
+ /* S_PENDING ==> */ A_LRM_EVENT,
+ /* S_STOPPING ==> */ A_LRM_EVENT,
+ /* S_TERMINATE ==> */ A_LRM_EVENT,
+ /* S_TRANSITION_ENGINE ==> */ A_LRM_EVENT,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* For everyone ending up in S_PENDING, (re)start the DC timer and wait for I_JOIN_OFFER or I_NOT_DC */
+/* Got an I_PENDING */
+ {
+ /* S_IDLE ==> */ O_RELEASE | O_DC_TIMER_RESTART,
+ /* S_ELECTION ==> */ O_RELEASE | O_DC_TIMER_RESTART,
+ /* S_INTEGRATION ==> */ O_RELEASE | O_DC_TIMER_RESTART,
+ /* S_FINALIZE_JOIN ==> */ O_RELEASE | O_DC_TIMER_RESTART,
+ /* S_NOT_DC ==> */ A_LOG | O_DC_TIMER_RESTART,
+ /* S_POLICY_ENGINE ==> */ O_RELEASE | O_DC_TIMER_RESTART,
+ /* S_RECOVERY ==> */ A_WARN,
+ /* S_RELEASE_DC ==> */ A_WARN | O_DC_TIMER_RESTART,
+ /* S_STARTING ==> */ A_LOG | A_DC_TIMER_START | A_CL_JOIN_QUERY,
+ /* S_PENDING ==> */ A_LOG | O_DC_TIMER_RESTART,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ O_RELEASE | O_DC_TIMER_RESTART,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_HALT */
+ {
+ /* S_IDLE ==> */ A_WARN,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_WARN,
+ /* S_FINALIZE_JOIN ==> */ A_WARN,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_WARN,
+ /* S_RECOVERY ==> */ A_WARN,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_WARN,
+ /* S_HALT ==> */ A_WARN,
+ },
+};
+
+/*!
+ * \internal
+ * \brief Get the next FSA state given an input and the current state
+ *
+ * \param[in] input FSA input
+ *
+ * \return The next FSA state
+ */
+enum crmd_fsa_state
+controld_fsa_get_next_state(enum crmd_fsa_input input)
+{
+ return fsa_next_states[input][controld_globals.fsa_state];
+}
+
+/*!
+ * \internal
+ * \brief Get the appropriate FSA action given an input and the current state
+ *
+ * \param[in] input FSA input
+ *
+ * \return The appropriate FSA action
+ */
+uint64_t
+controld_fsa_get_action(enum crmd_fsa_input input)
+{
+ return fsa_actions[input][controld_globals.fsa_state];
+}
diff --git a/daemons/controld/controld_membership.c b/daemons/controld/controld_membership.c
new file mode 100644
index 0000000..1f7e4c0
--- /dev/null
+++ b/daemons/controld/controld_membership.c
@@ -0,0 +1,457 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+/* put these first so that uuid_t is defined without conflicts */
+#include <crm_internal.h>
+
+#include <string.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/common/xml_internal.h>
+#include <crm/cluster/internal.h>
+
+#include <pacemaker-controld.h>
+
+void post_cache_update(int instance);
+
+extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
+
+static void
+reap_dead_nodes(gpointer key, gpointer value, gpointer user_data)
+{
+ crm_node_t *node = value;
+
+ if (crm_is_peer_active(node) == FALSE) {
+ crm_update_peer_join(__func__, node, crm_join_none);
+
+ if(node && node->uname) {
+ if (pcmk__str_eq(controld_globals.our_nodename, node->uname,
+ pcmk__str_casei)) {
+ crm_err("We're not part of the cluster anymore");
+ register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
+
+ } else if (!AM_I_DC
+ && pcmk__str_eq(node->uname, controld_globals.dc_name,
+ pcmk__str_casei)) {
+ crm_warn("Our DC node (%s) left the cluster", node->uname);
+ register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
+ }
+ }
+
+ if ((controld_globals.fsa_state == S_INTEGRATION)
+ || (controld_globals.fsa_state == S_FINALIZE_JOIN)) {
+ check_join_state(controld_globals.fsa_state, __func__);
+ }
+ if ((node != NULL) && (node->uuid != NULL)) {
+ fail_incompletable_actions(controld_globals.transition_graph,
+ node->uuid);
+ }
+ }
+}
+
+void
+post_cache_update(int instance)
+{
+ xmlNode *no_op = NULL;
+
+ crm_peer_seq = instance;
+ crm_debug("Updated cache after membership event %d.", instance);
+
+ g_hash_table_foreach(crm_peer_cache, reap_dead_nodes, NULL);
+ controld_set_fsa_input_flags(R_MEMBERSHIP);
+
+ if (AM_I_DC) {
+ populate_cib_nodes(node_update_quick | node_update_cluster | node_update_peer |
+ node_update_expected, __func__);
+ }
+
+ /*
+ * If we lost nodes, we should re-check the election status
+ * Safe to call outside of an election
+ */
+ controld_set_fsa_action_flags(A_ELECTION_CHECK);
+ controld_trigger_fsa();
+
+ /* Membership changed, remind everyone we're here.
+ * This will aid detection of duplicate DCs
+ */
+ no_op = create_request(CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_CRMD,
+ AM_I_DC ? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD, NULL);
+ send_cluster_message(NULL, crm_msg_crmd, no_op, FALSE);
+ free_xml(no_op);
+}
+
+static void
+crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ fsa_data_t *msg_data = NULL;
+
+ if (rc == pcmk_ok) {
+ crm_trace("Node update %d complete", call_id);
+
+ } else if(call_id < pcmk_ok) {
+ crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id);
+ crm_log_xml_debug(msg, "failed");
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+
+ } else {
+ crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
+ crm_log_xml_debug(msg, "failed");
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Create an XML node state tag with updates
+ *
+ * \param[in,out] node Node whose state will be used for update
+ * \param[in] flags Bitmask of node_update_flags indicating what to update
+ * \param[in,out] parent XML node to contain update (or NULL)
+ * \param[in] source Who requested the update (only used for logging)
+ *
+ * \return Pointer to created node state tag
+ */
+xmlNode *
+create_node_state_update(crm_node_t *node, int flags, xmlNode *parent,
+ const char *source)
+{
+ const char *value = NULL;
+ xmlNode *node_state;
+
+ if (!node->state) {
+ crm_info("Node update for %s cancelled: no state, not seen yet", node->uname);
+ return NULL;
+ }
+
+ node_state = create_xml_node(parent, XML_CIB_TAG_STATE);
+
+ if (pcmk_is_set(node->flags, crm_remote_node)) {
+ pcmk__xe_set_bool_attr(node_state, XML_NODE_IS_REMOTE, true);
+ }
+
+ set_uuid(node_state, XML_ATTR_ID, node);
+
+ if (crm_element_value(node_state, XML_ATTR_ID) == NULL) {
+ crm_info("Node update for %s cancelled: no id", node->uname);
+ free_xml(node_state);
+ return NULL;
+ }
+
+ crm_xml_add(node_state, XML_ATTR_UNAME, node->uname);
+
+ if ((flags & node_update_cluster) && node->state) {
+ pcmk__xe_set_bool_attr(node_state, XML_NODE_IN_CLUSTER,
+ pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei));
+ }
+
+ if (!pcmk_is_set(node->flags, crm_remote_node)) {
+ if (flags & node_update_peer) {
+ value = OFFLINESTATUS;
+ if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
+ value = ONLINESTATUS;
+ }
+ crm_xml_add(node_state, XML_NODE_IS_PEER, value);
+ }
+
+ if (flags & node_update_join) {
+ if (node->join <= crm_join_none) {
+ value = CRMD_JOINSTATE_DOWN;
+ } else {
+ value = CRMD_JOINSTATE_MEMBER;
+ }
+ crm_xml_add(node_state, XML_NODE_JOIN_STATE, value);
+ }
+
+ if (flags & node_update_expected) {
+ crm_xml_add(node_state, XML_NODE_EXPECTED, node->expected);
+ }
+ }
+
+ crm_xml_add(node_state, XML_ATTR_ORIGIN, source);
+
+ return node_state;
+}
+
+static void
+remove_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
+ xmlNode * output, void *user_data)
+{
+ char *node_uuid = user_data;
+
+ do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE,
+ "Deletion of the unknown conflicting node \"%s\": %s (rc=%d)",
+ node_uuid, pcmk_strerror(rc), rc);
+}
+
+static void
+search_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
+ xmlNode * output, void *user_data)
+{
+ char *new_node_uuid = user_data;
+ xmlNode *node_xml = NULL;
+
+ if (rc != pcmk_ok) {
+ if (rc != -ENXIO) {
+ crm_notice("Searching conflicting nodes for %s failed: %s (%d)",
+ new_node_uuid, pcmk_strerror(rc), rc);
+ }
+ return;
+
+ } else if (output == NULL) {
+ return;
+ }
+
+ if (pcmk__str_eq(crm_element_name(output), XML_CIB_TAG_NODE, pcmk__str_casei)) {
+ node_xml = output;
+
+ } else {
+ node_xml = pcmk__xml_first_child(output);
+ }
+
+ for (; node_xml != NULL; node_xml = pcmk__xml_next(node_xml)) {
+ const char *node_uuid = NULL;
+ const char *node_uname = NULL;
+ GHashTableIter iter;
+ crm_node_t *node = NULL;
+ gboolean known = FALSE;
+
+ if (!pcmk__str_eq(crm_element_name(node_xml), XML_CIB_TAG_NODE, pcmk__str_casei)) {
+ continue;
+ }
+
+ node_uuid = crm_element_value(node_xml, XML_ATTR_ID);
+ node_uname = crm_element_value(node_xml, XML_ATTR_UNAME);
+
+ if (node_uuid == NULL || node_uname == NULL) {
+ continue;
+ }
+
+ g_hash_table_iter_init(&iter, crm_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
+ if (node->uuid
+ && pcmk__str_eq(node->uuid, node_uuid, pcmk__str_casei)
+ && node->uname
+ && pcmk__str_eq(node->uname, node_uname, pcmk__str_casei)) {
+
+ known = TRUE;
+ break;
+ }
+ }
+
+ if (known == FALSE) {
+ cib_t *cib_conn = controld_globals.cib_conn;
+ int delete_call_id = 0;
+ xmlNode *node_state_xml = NULL;
+
+ crm_notice("Deleting unknown node %s/%s which has conflicting uname with %s",
+ node_uuid, node_uname, new_node_uuid);
+
+ delete_call_id = cib_conn->cmds->remove(cib_conn, XML_CIB_TAG_NODES,
+ node_xml, cib_scope_local);
+ fsa_register_cib_callback(delete_call_id, strdup(node_uuid),
+ remove_conflicting_node_callback);
+
+ node_state_xml = create_xml_node(NULL, XML_CIB_TAG_STATE);
+ crm_xml_add(node_state_xml, XML_ATTR_ID, node_uuid);
+ crm_xml_add(node_state_xml, XML_ATTR_UNAME, node_uname);
+
+ delete_call_id = cib_conn->cmds->remove(cib_conn,
+ XML_CIB_TAG_STATUS,
+ node_state_xml,
+ cib_scope_local);
+ fsa_register_cib_callback(delete_call_id, strdup(node_uuid),
+ remove_conflicting_node_callback);
+ free_xml(node_state_xml);
+ }
+ }
+}
+
+static void
+node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ fsa_data_t *msg_data = NULL;
+
+ if(call_id < pcmk_ok) {
+ crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id);
+ crm_log_xml_debug(msg, "update:failed");
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+
+ } else if(rc < pcmk_ok) {
+ crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
+ crm_log_xml_debug(msg, "update:failed");
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ }
+}
+
+void
+populate_cib_nodes(enum node_update_flags flags, const char *source)
+{
+ cib_t *cib_conn = controld_globals.cib_conn;
+
+ int call_id = 0;
+ gboolean from_hashtable = TRUE;
+ xmlNode *node_list = create_xml_node(NULL, XML_CIB_TAG_NODES);
+
+#if SUPPORT_COROSYNC
+ if (!pcmk_is_set(flags, node_update_quick) && is_corosync_cluster()) {
+ from_hashtable = pcmk__corosync_add_nodes(node_list);
+ }
+#endif
+
+ if (from_hashtable) {
+ GHashTableIter iter;
+ crm_node_t *node = NULL;
+ GString *xpath = NULL;
+
+ g_hash_table_iter_init(&iter, crm_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
+ xmlNode *new_node = NULL;
+
+ if ((node->uuid != NULL) && (node->uname != NULL)) {
+ crm_trace("Creating node entry for %s/%s", node->uname, node->uuid);
+ if (xpath == NULL) {
+ xpath = g_string_sized_new(512);
+ } else {
+ g_string_truncate(xpath, 0);
+ }
+
+ /* We need both to be valid */
+ new_node = create_xml_node(node_list, XML_CIB_TAG_NODE);
+ crm_xml_add(new_node, XML_ATTR_ID, node->uuid);
+ crm_xml_add(new_node, XML_ATTR_UNAME, node->uname);
+
+ /* Search and remove unknown nodes with the conflicting uname from CIB */
+ pcmk__g_strcat(xpath,
+ "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION
+ "/" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE
+ "[@" XML_ATTR_UNAME "='", node->uname, "']"
+ "[@" XML_ATTR_ID "!='", node->uuid, "']", NULL);
+
+ call_id = cib_conn->cmds->query(cib_conn,
+ (const char *) xpath->str,
+ NULL,
+ cib_scope_local|cib_xpath);
+ fsa_register_cib_callback(call_id, strdup(node->uuid),
+ search_conflicting_node_callback);
+ }
+ }
+
+ if (xpath != NULL) {
+ g_string_free(xpath, TRUE);
+ }
+ }
+
+ crm_trace("Populating <nodes> section from %s", from_hashtable ? "hashtable" : "cluster");
+
+ if ((controld_update_cib(XML_CIB_TAG_NODES, node_list, cib_scope_local,
+ node_list_update_callback) == pcmk_rc_ok)
+ && (crm_peer_cache != NULL) && AM_I_DC) {
+ /*
+ * There is no need to update the local CIB with our values if
+ * we've not seen valid membership data
+ */
+ GHashTableIter iter;
+ crm_node_t *node = NULL;
+
+ free_xml(node_list);
+ node_list = create_xml_node(NULL, XML_CIB_TAG_STATUS);
+
+ g_hash_table_iter_init(&iter, crm_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
+ create_node_state_update(node, flags, node_list, source);
+ }
+
+ if (crm_remote_peer_cache) {
+ g_hash_table_iter_init(&iter, crm_remote_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
+ create_node_state_update(node, flags, node_list, source);
+ }
+ }
+
+ controld_update_cib(XML_CIB_TAG_STATUS, node_list, cib_scope_local,
+ crmd_node_update_complete);
+ }
+ free_xml(node_list);
+}
+
+static void
+cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ fsa_data_t *msg_data = NULL;
+
+ if (rc == pcmk_ok) {
+ crm_trace("Quorum update %d complete", call_id);
+
+ } else {
+ crm_err("Quorum update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
+ crm_log_xml_debug(msg, "failed");
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ }
+}
+
+void
+crm_update_quorum(gboolean quorum, gboolean force_update)
+{
+ bool has_quorum = pcmk_is_set(controld_globals.flags, controld_has_quorum);
+
+ if (quorum) {
+ controld_set_global_flags(controld_ever_had_quorum);
+
+ } else if (pcmk_all_flags_set(controld_globals.flags,
+ controld_ever_had_quorum
+ |controld_no_quorum_suicide)) {
+ pcmk__panic(__func__);
+ }
+
+ if (AM_I_DC
+ && ((has_quorum && !quorum) || (!has_quorum && quorum)
+ || force_update)) {
+ xmlNode *update = NULL;
+
+ update = create_xml_node(NULL, XML_TAG_CIB);
+ crm_xml_add_int(update, XML_ATTR_HAVE_QUORUM, quorum);
+ crm_xml_add(update, XML_ATTR_DC_UUID, controld_globals.our_uuid);
+
+ crm_debug("Updating quorum status to %s", pcmk__btoa(quorum));
+ controld_update_cib(XML_TAG_CIB, update, cib_scope_local,
+ cib_quorum_update_complete);
+ free_xml(update);
+
+ /* Quorum changes usually cause a new transition via other activity:
+ * quorum gained via a node joining will abort via the node join,
+ * and quorum lost via a node leaving will usually abort via resource
+ * activity and/or fencing.
+ *
+ * However, it is possible that nothing else causes a transition (e.g.
+ * someone forces quorum via corosync-cmaptcl, or quorum is lost due to
+ * a node in standby shutting down cleanly), so here ensure a new
+ * transition is triggered.
+ */
+ if (quorum) {
+ /* If quorum was gained, abort after a short delay, in case multiple
+ * nodes are joining around the same time, so the one that brings us
+ * to quorum doesn't cause all the remaining ones to be fenced.
+ */
+ abort_after_delay(INFINITY, pcmk__graph_restart, "Quorum gained",
+ 5000);
+ } else {
+ abort_transition(INFINITY, pcmk__graph_restart, "Quorum lost",
+ NULL);
+ }
+ }
+
+ if (quorum) {
+ controld_set_global_flags(controld_has_quorum);
+ } else {
+ controld_clear_global_flags(controld_has_quorum);
+ }
+}
diff --git a/daemons/controld/controld_membership.h b/daemons/controld/controld_membership.h
new file mode 100644
index 0000000..cfe8cee
--- /dev/null
+++ b/daemons/controld/controld_membership.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2012-2021 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+#ifndef MEMBERSHIP__H
+# define MEMBERSHIP__H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <crm/cluster/internal.h>
+
+void post_cache_update(int instance);
+
+extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
+
+void controld_destroy_failed_sync_table(void);
+void controld_remove_failed_sync_node(const char *node_name);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c
new file mode 100644
index 0000000..54b27ec
--- /dev/null
+++ b/daemons/controld/controld_messages.c
@@ -0,0 +1,1307 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <string.h>
+#include <time.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/cluster/internal.h>
+#include <crm/cib.h>
+#include <crm/common/ipc_internal.h>
+
+#include <pacemaker-controld.h>
+
+extern void crm_shutdown(int nsig);
+
+static enum crmd_fsa_input handle_message(xmlNode *msg,
+ enum crmd_fsa_cause cause);
+static void handle_response(xmlNode *stored_msg);
+static enum crmd_fsa_input handle_request(xmlNode *stored_msg,
+ enum crmd_fsa_cause cause);
+static enum crmd_fsa_input handle_shutdown_request(xmlNode *stored_msg);
+static void send_msg_via_ipc(xmlNode * msg, const char *sys);
+
+/* debug only, can wrap all it likes */
+static int last_data_id = 0;
+
+void
+register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
+ fsa_data_t * cur_data, void *new_data, const char *raised_from)
+{
+ /* save the current actions if any */
+ if (controld_globals.fsa_actions != A_NOTHING) {
+ register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL,
+ I_NULL, cur_data ? cur_data->data : NULL,
+ controld_globals.fsa_actions, TRUE, __func__);
+ }
+
+ /* reset the action list */
+ crm_info("Resetting the current action list");
+ fsa_dump_actions(controld_globals.fsa_actions, "Drop");
+ controld_globals.fsa_actions = A_NOTHING;
+
+ /* register the error */
+ register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from);
+}
+
+void
+register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
+ void *data, uint64_t with_actions,
+ gboolean prepend, const char *raised_from)
+{
+ unsigned old_len = g_list_length(controld_globals.fsa_message_queue);
+ fsa_data_t *fsa_data = NULL;
+
+ if (raised_from == NULL) {
+ raised_from = "<unknown>";
+ }
+
+ if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) {
+ /* no point doing anything */
+ crm_err("Cannot add entry to queue: no input and no action");
+ return;
+ }
+
+ if (input == I_WAIT_FOR_EVENT) {
+ controld_set_global_flags(controld_fsa_is_stalled);
+ crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d",
+ raised_from, fsa_cause2string(cause), data, old_len);
+
+ if (old_len > 0) {
+ fsa_dump_queue(LOG_TRACE);
+ prepend = FALSE;
+ }
+
+ if (data == NULL) {
+ controld_set_fsa_action_flags(with_actions);
+ fsa_dump_actions(with_actions, "Restored");
+ return;
+ }
+
+ /* Store everything in the new event and reset
+ * controld_globals.fsa_actions
+ */
+ with_actions |= controld_globals.fsa_actions;
+ controld_globals.fsa_actions = A_NOTHING;
+ }
+
+ last_data_id++;
+ crm_trace("%s %s FSA input %d (%s) due to %s, %s data",
+ raised_from, (prepend? "prepended" : "appended"), last_data_id,
+ fsa_input2string(input), fsa_cause2string(cause),
+ (data? "with" : "without"));
+
+ fsa_data = calloc(1, sizeof(fsa_data_t));
+ fsa_data->id = last_data_id;
+ fsa_data->fsa_input = input;
+ fsa_data->fsa_cause = cause;
+ fsa_data->origin = raised_from;
+ fsa_data->data = NULL;
+ fsa_data->data_type = fsa_dt_none;
+ fsa_data->actions = with_actions;
+
+ if (with_actions != A_NOTHING) {
+ crm_trace("Adding actions %.16llx to input",
+ (unsigned long long) with_actions);
+ }
+
+ if (data != NULL) {
+ switch (cause) {
+ case C_FSA_INTERNAL:
+ case C_CRMD_STATUS_CALLBACK:
+ case C_IPC_MESSAGE:
+ case C_HA_MESSAGE:
+ CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL,
+ crm_err("Bogus data from %s", raised_from));
+ crm_trace("Copying %s data from %s as cluster message data",
+ fsa_cause2string(cause), raised_from);
+ fsa_data->data = copy_ha_msg_input(data);
+ fsa_data->data_type = fsa_dt_ha_msg;
+ break;
+
+ case C_LRM_OP_CALLBACK:
+ crm_trace("Copying %s data from %s as lrmd_event_data_t",
+ fsa_cause2string(cause), raised_from);
+ fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data);
+ fsa_data->data_type = fsa_dt_lrm;
+ break;
+
+ case C_TIMER_POPPED:
+ case C_SHUTDOWN:
+ case C_UNKNOWN:
+ case C_STARTUP:
+ crm_crit("Copying %s data (from %s) is not yet implemented",
+ fsa_cause2string(cause), raised_from);
+ crmd_exit(CRM_EX_SOFTWARE);
+ break;
+ }
+ }
+
+ /* make sure to free it properly later */
+ if (prepend) {
+ controld_globals.fsa_message_queue
+ = g_list_prepend(controld_globals.fsa_message_queue, fsa_data);
+ } else {
+ controld_globals.fsa_message_queue
+ = g_list_append(controld_globals.fsa_message_queue, fsa_data);
+ }
+
+ crm_trace("FSA message queue length is %d",
+ g_list_length(controld_globals.fsa_message_queue));
+
+ /* fsa_dump_queue(LOG_TRACE); */
+
+ if (old_len == g_list_length(controld_globals.fsa_message_queue)) {
+ crm_err("Couldn't add message to the queue");
+ }
+
+ if (input != I_WAIT_FOR_EVENT) {
+ controld_trigger_fsa();
+ }
+}
+
+void
+fsa_dump_queue(int log_level)
+{
+ int offset = 0;
+
+ for (GList *iter = controld_globals.fsa_message_queue; iter != NULL;
+ iter = iter->next) {
+ fsa_data_t *data = (fsa_data_t *) iter->data;
+
+ do_crm_log_unlikely(log_level,
+ "queue[%d.%d]: input %s raised by %s(%p.%d)\t(cause=%s)",
+ offset++, data->id, fsa_input2string(data->fsa_input),
+ data->origin, data->data, data->data_type,
+ fsa_cause2string(data->fsa_cause));
+ }
+}
+
+ha_msg_input_t *
+copy_ha_msg_input(ha_msg_input_t * orig)
+{
+ ha_msg_input_t *copy = calloc(1, sizeof(ha_msg_input_t));
+
+ CRM_ASSERT(copy != NULL);
+ copy->msg = (orig && orig->msg)? copy_xml(orig->msg) : NULL;
+ copy->xml = get_message_xml(copy->msg, F_CRM_DATA);
+ return copy;
+}
+
+void
+delete_fsa_input(fsa_data_t * fsa_data)
+{
+ lrmd_event_data_t *op = NULL;
+ xmlNode *foo = NULL;
+
+ if (fsa_data == NULL) {
+ return;
+ }
+ crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause));
+
+ if (fsa_data->data != NULL) {
+ switch (fsa_data->data_type) {
+ case fsa_dt_ha_msg:
+ delete_ha_msg_input(fsa_data->data);
+ break;
+
+ case fsa_dt_xml:
+ foo = fsa_data->data;
+ free_xml(foo);
+ break;
+
+ case fsa_dt_lrm:
+ op = (lrmd_event_data_t *) fsa_data->data;
+ lrmd_free_event(op);
+ break;
+
+ case fsa_dt_none:
+ if (fsa_data->data != NULL) {
+ crm_err("Don't know how to free %s data from %s",
+ fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
+ crmd_exit(CRM_EX_SOFTWARE);
+ }
+ break;
+ }
+ crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause));
+ }
+
+ free(fsa_data);
+}
+
+/* returns the next message */
+fsa_data_t *
+get_message(void)
+{
+ fsa_data_t *message
+ = (fsa_data_t *) controld_globals.fsa_message_queue->data;
+
+ controld_globals.fsa_message_queue
+ = g_list_remove(controld_globals.fsa_message_queue, message);
+ crm_trace("Processing input %d", message->id);
+ return message;
+}
+
+void *
+fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller)
+{
+ void *ret_val = NULL;
+
+ if (fsa_data == NULL) {
+ crm_err("%s: No FSA data available", caller);
+
+ } else if (fsa_data->data == NULL) {
+ crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin);
+
+ } else if (fsa_data->data_type != a_type) {
+ crm_crit("%s: Message data was the wrong type! %d vs. requested=%d. Origin: %s",
+ caller, fsa_data->data_type, a_type, fsa_data->origin);
+ CRM_ASSERT(fsa_data->data_type == a_type);
+ } else {
+ ret_val = fsa_data->data;
+ }
+
+ return ret_val;
+}
+
+/* A_MSG_ROUTE */
+void
+do_msg_route(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
+
+ route_message(msg_data->fsa_cause, input->msg);
+}
+
+void
+route_message(enum crmd_fsa_cause cause, xmlNode * input)
+{
+ ha_msg_input_t fsa_input;
+ enum crmd_fsa_input result = I_NULL;
+
+ fsa_input.msg = input;
+ CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return);
+
+ /* try passing the buck first */
+ if (relay_message(input, cause == C_IPC_MESSAGE)) {
+ return;
+ }
+
+ /* handle locally */
+ result = handle_message(input, cause);
+
+ /* done or process later? */
+ switch (result) {
+ case I_NULL:
+ case I_CIB_OP:
+ case I_ROUTER:
+ case I_NODE_JOIN:
+ case I_JOIN_REQUEST:
+ case I_JOIN_RESULT:
+ break;
+ default:
+ /* Defering local processing of message */
+ register_fsa_input_later(cause, result, &fsa_input);
+ return;
+ }
+
+ if (result != I_NULL) {
+ /* add to the front of the queue */
+ register_fsa_input(cause, result, &fsa_input);
+ }
+}
+
+gboolean
+relay_message(xmlNode * msg, gboolean originated_locally)
+{
+ int dest = 1;
+ bool is_for_dc = false;
+ bool is_for_dcib = false;
+ bool is_for_te = false;
+ bool is_for_crm = false;
+ bool is_for_cib = false;
+ bool is_local = false;
+ const char *host_to = crm_element_value(msg, F_CRM_HOST_TO);
+ const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
+ const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
+ const char *type = crm_element_value(msg, F_TYPE);
+ const char *task = crm_element_value(msg, F_CRM_TASK);
+ const char *ref = crm_element_value(msg, XML_ATTR_REFERENCE);
+
+ if (ref == NULL) {
+ ref = "without reference ID";
+ }
+
+ if (msg == NULL) {
+ crm_warn("Cannot route empty message");
+ return TRUE;
+
+ } else if (pcmk__str_eq(task, CRM_OP_HELLO, pcmk__str_casei)) {
+ crm_trace("No routing needed for hello message %s", ref);
+ return TRUE;
+
+ } else if (!pcmk__str_eq(type, T_CRM, pcmk__str_casei)) {
+ crm_warn("Received invalid message %s: type '%s' not '" T_CRM "'",
+ ref, pcmk__s(type, ""));
+ crm_log_xml_warn(msg, "[bad message type]");
+ return TRUE;
+
+ } else if (sys_to == NULL) {
+ crm_warn("Received invalid message %s: no subsystem", ref);
+ crm_log_xml_warn(msg, "[no subsystem]");
+ return TRUE;
+ }
+
+ is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0);
+ is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0);
+ is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0);
+ is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0);
+ is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0);
+
+ is_local = false;
+ if (pcmk__str_empty(host_to)) {
+ if (is_for_dc || is_for_te) {
+ is_local = false;
+
+ } else if (is_for_crm) {
+ if (pcmk__strcase_any_of(task, CRM_OP_NODE_INFO,
+ PCMK__CONTROLD_CMD_NODES, NULL)) {
+ /* Node info requests do not specify a host, which is normally
+ * treated as "all hosts", because the whole point is that the
+ * client may not know the local node name. Always handle these
+ * requests locally.
+ */
+ is_local = true;
+ } else {
+ is_local = !originated_locally;
+ }
+
+ } else {
+ is_local = true;
+ }
+
+ } else if (pcmk__str_eq(controld_globals.our_nodename, host_to,
+ pcmk__str_casei)) {
+ is_local = true;
+ } else if (is_for_crm && pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) {
+ xmlNode *msg_data = get_message_xml(msg, F_CRM_DATA);
+ const char *mode = crm_element_value(msg_data, PCMK__XA_MODE);
+
+ if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_casei)) {
+ // Local delete of an offline node's resource history
+ is_local = true;
+ }
+ }
+
+ if (is_for_dc || is_for_dcib || is_for_te) {
+ if (AM_I_DC && is_for_te) {
+ crm_trace("Route message %s locally as transition request", ref);
+ send_msg_via_ipc(msg, sys_to);
+
+ } else if (AM_I_DC) {
+ crm_trace("Route message %s locally as DC request", ref);
+ return FALSE; // More to be done by caller
+
+ } else if (originated_locally && !pcmk__strcase_any_of(sys_from, CRM_SYSTEM_PENGINE,
+ CRM_SYSTEM_TENGINE, NULL)) {
+
+ if (is_corosync_cluster()) {
+ dest = text2msg_type(sys_to);
+ }
+ crm_trace("Relay message %s to DC", ref);
+ send_cluster_message(host_to ? crm_get_peer(0, host_to) : NULL, dest, msg, TRUE);
+
+ } else {
+ /* Neither the TE nor the scheduler should be sending messages
+ * to DCs on other nodes. By definition, if we are no longer the DC,
+ * then the scheduler's or TE's data should be discarded.
+ */
+ crm_trace("Discard message %s because we are not DC", ref);
+ }
+
+ } else if (is_local && (is_for_crm || is_for_cib)) {
+ crm_trace("Route message %s locally as controller request", ref);
+ return FALSE; // More to be done by caller
+
+ } else if (is_local) {
+ crm_trace("Relay message %s locally to %s",
+ ref, (sys_to? sys_to : "unknown client"));
+ crm_log_xml_trace(msg, "[IPC relay]");
+ send_msg_via_ipc(msg, sys_to);
+
+ } else {
+ crm_node_t *node_to = NULL;
+
+ if (is_corosync_cluster()) {
+ dest = text2msg_type(sys_to);
+
+ if (dest == crm_msg_none || dest > crm_msg_stonith_ng) {
+ dest = crm_msg_crmd;
+ }
+ }
+
+ if (host_to) {
+ node_to = pcmk__search_cluster_node_cache(0, host_to);
+ if (node_to == NULL) {
+ crm_warn("Cannot route message %s: Unknown node %s",
+ ref, host_to);
+ return TRUE;
+ }
+ crm_trace("Relay message %s to %s",
+ ref, (node_to->uname? node_to->uname : "peer"));
+ } else {
+ crm_trace("Broadcast message %s to all peers", ref);
+ }
+ send_cluster_message(host_to ? node_to : NULL, dest, msg, TRUE);
+ }
+
+ return TRUE; // No further processing of message is needed
+}
+
+// Return true if field contains a positive integer
+static bool
+authorize_version(xmlNode *message_data, const char *field,
+ const char *client_name, const char *ref, const char *uuid)
+{
+ const char *version = crm_element_value(message_data, field);
+ long long version_num;
+
+ if ((pcmk__scan_ll(version, &version_num, -1LL) != pcmk_rc_ok)
+ || (version_num < 0LL)) {
+
+ crm_warn("Rejected IPC hello from %s: '%s' is not a valid protocol %s "
+ CRM_XS " ref=%s uuid=%s",
+ client_name, ((version == NULL)? "" : version),
+ field, (ref? ref : "none"), uuid);
+ return false;
+ }
+ return true;
+}
+
+/*!
+ * \internal
+ * \brief Check whether a client IPC message is acceptable
+ *
+ * If a given client IPC message is a hello, "authorize" it by ensuring it has
+ * valid information such as a protocol version, and return false indicating
+ * that nothing further needs to be done with the message. If the message is not
+ * a hello, just return true to indicate it needs further processing.
+ *
+ * \param[in] client_msg XML of IPC message
+ * \param[in,out] curr_client If IPC is not proxied, client that sent message
+ * \param[in] proxy_session If IPC is proxied, the session ID
+ *
+ * \return true if message needs further processing, false if it doesn't
+ */
+bool
+controld_authorize_ipc_message(const xmlNode *client_msg, pcmk__client_t *curr_client,
+ const char *proxy_session)
+{
+ xmlNode *message_data = NULL;
+ const char *client_name = NULL;
+ const char *op = crm_element_value(client_msg, F_CRM_TASK);
+ const char *ref = crm_element_value(client_msg, XML_ATTR_REFERENCE);
+ const char *uuid = (curr_client? curr_client->id : proxy_session);
+
+ if (uuid == NULL) {
+ crm_warn("IPC message from client rejected: No client identifier "
+ CRM_XS " ref=%s", (ref? ref : "none"));
+ goto rejected;
+ }
+
+ if (!pcmk__str_eq(CRM_OP_HELLO, op, pcmk__str_casei)) {
+ // Only hello messages need to be authorized
+ return true;
+ }
+
+ message_data = get_message_xml(client_msg, F_CRM_DATA);
+
+ client_name = crm_element_value(message_data, "client_name");
+ if (pcmk__str_empty(client_name)) {
+ crm_warn("IPC hello from client rejected: No client name",
+ CRM_XS " ref=%s uuid=%s", (ref? ref : "none"), uuid);
+ goto rejected;
+ }
+ if (!authorize_version(message_data, "major_version", client_name, ref,
+ uuid)) {
+ goto rejected;
+ }
+ if (!authorize_version(message_data, "minor_version", client_name, ref,
+ uuid)) {
+ goto rejected;
+ }
+
+ crm_trace("Validated IPC hello from client %s", client_name);
+ if (curr_client) {
+ curr_client->userdata = strdup(client_name);
+ }
+ controld_trigger_fsa();
+ return false;
+
+rejected:
+ if (curr_client) {
+ qb_ipcs_disconnect(curr_client->ipcs);
+ }
+ return false;
+}
+
+static enum crmd_fsa_input
+handle_message(xmlNode *msg, enum crmd_fsa_cause cause)
+{
+ const char *type = NULL;
+
+ CRM_CHECK(msg != NULL, return I_NULL);
+
+ type = crm_element_value(msg, F_CRM_MSG_TYPE);
+ if (pcmk__str_eq(type, XML_ATTR_REQUEST, pcmk__str_none)) {
+ return handle_request(msg, cause);
+
+ } else if (pcmk__str_eq(type, XML_ATTR_RESPONSE, pcmk__str_none)) {
+ handle_response(msg);
+ return I_NULL;
+ }
+
+ crm_err("Unknown message type: %s", type);
+ return I_NULL;
+}
+
+static enum crmd_fsa_input
+handle_failcount_op(xmlNode * stored_msg)
+{
+ const char *rsc = NULL;
+ const char *uname = NULL;
+ const char *op = NULL;
+ char *interval_spec = NULL;
+ guint interval_ms = 0;
+ gboolean is_remote_node = FALSE;
+ xmlNode *xml_op = get_message_xml(stored_msg, F_CRM_DATA);
+
+ if (xml_op) {
+ xmlNode *xml_rsc = first_named_child(xml_op, XML_CIB_TAG_RESOURCE);
+ xmlNode *xml_attrs = first_named_child(xml_op, XML_TAG_ATTRS);
+
+ if (xml_rsc) {
+ rsc = ID(xml_rsc);
+ }
+ if (xml_attrs) {
+ op = crm_element_value(xml_attrs,
+ CRM_META "_" XML_RSC_ATTR_CLEAR_OP);
+ crm_element_value_ms(xml_attrs,
+ CRM_META "_" XML_RSC_ATTR_CLEAR_INTERVAL,
+ &interval_ms);
+ }
+ }
+ uname = crm_element_value(xml_op, XML_LRM_ATTR_TARGET);
+
+ if ((rsc == NULL) || (uname == NULL)) {
+ crm_log_xml_warn(stored_msg, "invalid failcount op");
+ return I_NULL;
+ }
+
+ if (crm_element_value(xml_op, XML_LRM_ATTR_ROUTER_NODE)) {
+ is_remote_node = TRUE;
+ }
+
+ crm_debug("Clearing failures for %s-interval %s on %s "
+ "from attribute manager, CIB, and executor state",
+ pcmk__readable_interval(interval_ms), rsc, uname);
+
+ if (interval_ms) {
+ interval_spec = crm_strdup_printf("%ums", interval_ms);
+ }
+ update_attrd_clear_failures(uname, rsc, op, interval_spec, is_remote_node);
+ free(interval_spec);
+
+ controld_cib_delete_last_failure(rsc, uname, op, interval_ms);
+
+ lrm_clear_last_failure(rsc, uname, op, interval_ms);
+
+ return I_NULL;
+}
+
+static enum crmd_fsa_input
+handle_lrm_delete(xmlNode *stored_msg)
+{
+ const char *mode = NULL;
+ xmlNode *msg_data = get_message_xml(stored_msg, F_CRM_DATA);
+
+ CRM_CHECK(msg_data != NULL, return I_NULL);
+
+ /* CRM_OP_LRM_DELETE has two distinct modes. The default behavior is to
+ * relay the operation to the affected node, which will unregister the
+ * resource from the local executor, clear the resource's history from the
+ * CIB, and do some bookkeeping in the controller.
+ *
+ * However, if the affected node is offline, the client will specify
+ * mode="cib" which means the controller receiving the operation should
+ * clear the resource's history from the CIB and nothing else. This is used
+ * to clear shutdown locks.
+ */
+ mode = crm_element_value(msg_data, PCMK__XA_MODE);
+ if ((mode == NULL) || strcmp(mode, XML_TAG_CIB)) {
+ // Relay to affected node
+ crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD);
+ return I_ROUTER;
+
+ } else {
+ // Delete CIB history locally (compare with do_lrm_delete())
+ const char *from_sys = NULL;
+ const char *user_name = NULL;
+ const char *rsc_id = NULL;
+ const char *node = NULL;
+ xmlNode *rsc_xml = NULL;
+ int rc = pcmk_rc_ok;
+
+ rsc_xml = first_named_child(msg_data, XML_CIB_TAG_RESOURCE);
+ CRM_CHECK(rsc_xml != NULL, return I_NULL);
+
+ rsc_id = ID(rsc_xml);
+ from_sys = crm_element_value(stored_msg, F_CRM_SYS_FROM);
+ node = crm_element_value(msg_data, XML_LRM_ATTR_TARGET);
+ user_name = pcmk__update_acl_user(stored_msg, F_CRM_USER, NULL);
+ crm_debug("Handling " CRM_OP_LRM_DELETE " for %s on %s locally%s%s "
+ "(clearing CIB resource history only)", rsc_id, node,
+ (user_name? " for user " : ""), (user_name? user_name : ""));
+ rc = controld_delete_resource_history(rsc_id, node, user_name,
+ cib_dryrun|cib_sync_call);
+ if (rc == pcmk_rc_ok) {
+ rc = controld_delete_resource_history(rsc_id, node, user_name,
+ crmd_cib_smart_opt());
+ }
+
+ //Notify client and tengine.(Only notify tengine if mode = "cib" and CRM_OP_LRM_DELETE.)
+ if (from_sys) {
+ lrmd_event_data_t *op = NULL;
+ const char *from_host = crm_element_value(stored_msg,
+ F_CRM_HOST_FROM);
+ const char *transition;
+
+ if (strcmp(from_sys, CRM_SYSTEM_TENGINE)) {
+ transition = crm_element_value(msg_data,
+ XML_ATTR_TRANSITION_KEY);
+ } else {
+ transition = crm_element_value(stored_msg,
+ XML_ATTR_TRANSITION_KEY);
+ }
+
+ crm_info("Notifying %s on %s that %s was%s deleted",
+ from_sys, (from_host? from_host : "local node"), rsc_id,
+ ((rc == pcmk_rc_ok)? "" : " not"));
+ op = lrmd_new_event(rsc_id, CRMD_ACTION_DELETE, 0);
+ op->type = lrmd_event_exec_complete;
+ op->user_data = strdup(transition? transition : FAKE_TE_ID);
+ op->params = pcmk__strkey_table(free, free);
+ g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION),
+ strdup(CRM_FEATURE_SET));
+ controld_rc2event(op, rc);
+ controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id);
+ lrmd_free_event(op);
+ controld_trigger_delete_refresh(from_sys, rsc_id);
+ }
+ return I_NULL;
+ }
+}
+
+/*!
+ * \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache
+ *
+ * \param[in] msg Message XML
+ *
+ * \return Next FSA input
+ */
+static enum crmd_fsa_input
+handle_remote_state(const xmlNode *msg)
+{
+ const char *conn_host = NULL;
+ const char *remote_uname = ID(msg);
+ crm_node_t *remote_peer;
+ bool remote_is_up = false;
+ int rc = pcmk_rc_ok;
+
+ rc = pcmk__xe_get_bool_attr(msg, XML_NODE_IN_CLUSTER, &remote_is_up);
+
+ CRM_CHECK(remote_uname && rc == pcmk_rc_ok, return I_NULL);
+
+ remote_peer = crm_remote_peer_get(remote_uname);
+ CRM_CHECK(remote_peer, return I_NULL);
+
+ pcmk__update_peer_state(__func__, remote_peer,
+ remote_is_up ? CRM_NODE_MEMBER : CRM_NODE_LOST,
+ 0);
+
+ conn_host = crm_element_value(msg, PCMK__XA_CONN_HOST);
+ if (conn_host) {
+ pcmk__str_update(&remote_peer->conn_host, conn_host);
+ } else if (remote_peer->conn_host) {
+ free(remote_peer->conn_host);
+ remote_peer->conn_host = NULL;
+ }
+
+ return I_NULL;
+}
+
+/*!
+ * \brief Handle a CRM_OP_PING message
+ *
+ * \param[in] msg Message XML
+ *
+ * \return Next FSA input
+ */
+static enum crmd_fsa_input
+handle_ping(const xmlNode *msg)
+{
+ const char *value = NULL;
+ xmlNode *ping = NULL;
+ xmlNode *reply = NULL;
+
+ // Build reply
+
+ ping = create_xml_node(NULL, XML_CRM_TAG_PING);
+ value = crm_element_value(msg, F_CRM_SYS_TO);
+ crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value);
+
+ // Add controller state
+ value = fsa_state2string(controld_globals.fsa_state);
+ crm_xml_add(ping, XML_PING_ATTR_CRMDSTATE, value);
+ crm_notice("Current ping state: %s", value); // CTS needs this
+
+ // Add controller health
+ // @TODO maybe do some checks to determine meaningful status
+ crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
+
+ // Send reply
+ reply = create_reply(msg, ping);
+ free_xml(ping);
+ if (reply != NULL) {
+ (void) relay_message(reply, TRUE);
+ free_xml(reply);
+ }
+
+ // Nothing further to do
+ return I_NULL;
+}
+
+/*!
+ * \brief Handle a PCMK__CONTROLD_CMD_NODES message
+ *
+ * \param[in] request Message XML
+ *
+ * \return Next FSA input
+ */
+static enum crmd_fsa_input
+handle_node_list(const xmlNode *request)
+{
+ GHashTableIter iter;
+ crm_node_t *node = NULL;
+ xmlNode *reply = NULL;
+ xmlNode *reply_data = NULL;
+
+ // Create message data for reply
+ reply_data = create_xml_node(NULL, XML_CIB_TAG_NODES);
+ g_hash_table_iter_init(&iter, crm_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
+ xmlNode *xml = create_xml_node(reply_data, XML_CIB_TAG_NODE);
+
+ crm_xml_add_ll(xml, XML_ATTR_ID, (long long) node->id); // uint32_t
+ crm_xml_add(xml, XML_ATTR_UNAME, node->uname);
+ crm_xml_add(xml, XML_NODE_IN_CLUSTER, node->state);
+ }
+
+ // Create and send reply
+ reply = create_reply(request, reply_data);
+ free_xml(reply_data);
+ if (reply) {
+ (void) relay_message(reply, TRUE);
+ free_xml(reply);
+ }
+
+ // Nothing further to do
+ return I_NULL;
+}
+
+/*!
+ * \brief Handle a CRM_OP_NODE_INFO request
+ *
+ * \param[in] msg Message XML
+ *
+ * \return Next FSA input
+ */
+static enum crmd_fsa_input
+handle_node_info_request(const xmlNode *msg)
+{
+ const char *value = NULL;
+ crm_node_t *node = NULL;
+ int node_id = 0;
+ xmlNode *reply = NULL;
+ xmlNode *reply_data = NULL;
+
+ // Build reply
+
+ reply_data = create_xml_node(NULL, XML_CIB_TAG_NODE);
+ crm_xml_add(reply_data, XML_PING_ATTR_SYSFROM, CRM_SYSTEM_CRMD);
+
+ // Add whether current partition has quorum
+ pcmk__xe_set_bool_attr(reply_data, XML_ATTR_HAVE_QUORUM,
+ pcmk_is_set(controld_globals.flags,
+ controld_has_quorum));
+
+ // Check whether client requested node info by ID and/or name
+ crm_element_value_int(msg, XML_ATTR_ID, &node_id);
+ if (node_id < 0) {
+ node_id = 0;
+ }
+ value = crm_element_value(msg, XML_ATTR_UNAME);
+
+ // Default to local node if none given
+ if ((node_id == 0) && (value == NULL)) {
+ value = controld_globals.our_nodename;
+ }
+
+ node = pcmk__search_node_caches(node_id, value, CRM_GET_PEER_ANY);
+ if (node) {
+ crm_xml_add(reply_data, XML_ATTR_ID, node->uuid);
+ crm_xml_add(reply_data, XML_ATTR_UNAME, node->uname);
+ crm_xml_add(reply_data, XML_NODE_IS_PEER, node->state);
+ pcmk__xe_set_bool_attr(reply_data, XML_NODE_IS_REMOTE,
+ pcmk_is_set(node->flags, crm_remote_node));
+ }
+
+ // Send reply
+ reply = create_reply(msg, reply_data);
+ free_xml(reply_data);
+ if (reply != NULL) {
+ (void) relay_message(reply, TRUE);
+ free_xml(reply);
+ }
+
+ // Nothing further to do
+ return I_NULL;
+}
+
+static void
+verify_feature_set(xmlNode *msg)
+{
+ const char *dc_version = crm_element_value(msg, XML_ATTR_CRM_VERSION);
+
+ if (dc_version == NULL) {
+ /* All we really know is that the DC feature set is older than 3.1.0,
+ * but that's also all that really matters.
+ */
+ dc_version = "3.0.14";
+ }
+
+ if (feature_set_compatible(dc_version, CRM_FEATURE_SET)) {
+ crm_trace("Local feature set (%s) is compatible with DC's (%s)",
+ CRM_FEATURE_SET, dc_version);
+ } else {
+ crm_err("Local feature set (%s) is incompatible with DC's (%s)",
+ CRM_FEATURE_SET, dc_version);
+
+ // Nothing is likely to improve without administrator involvement
+ controld_set_fsa_input_flags(R_STAYDOWN);
+ crmd_exit(CRM_EX_FATAL);
+ }
+}
+
+// DC gets own shutdown all-clear
+static enum crmd_fsa_input
+handle_shutdown_self_ack(xmlNode *stored_msg)
+{
+ const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ // The expected case -- we initiated own shutdown sequence
+ crm_info("Shutting down controller");
+ return I_STOP;
+ }
+
+ if (pcmk__str_eq(host_from, controld_globals.dc_name, pcmk__str_casei)) {
+ // Must be logic error -- DC confirming its own unrequested shutdown
+ crm_err("Shutting down controller immediately due to "
+ "unexpected shutdown confirmation");
+ return I_TERMINATE;
+ }
+
+ if (controld_globals.fsa_state != S_STOPPING) {
+ // Shouldn't happen -- non-DC confirming unrequested shutdown
+ crm_err("Starting new DC election because %s is "
+ "confirming shutdown we did not request",
+ (host_from? host_from : "another node"));
+ return I_ELECTION;
+ }
+
+ // Shouldn't happen, but we are already stopping anyway
+ crm_debug("Ignoring unexpected shutdown confirmation from %s",
+ (host_from? host_from : "another node"));
+ return I_NULL;
+}
+
+// Non-DC gets shutdown all-clear from DC
+static enum crmd_fsa_input
+handle_shutdown_ack(xmlNode *stored_msg)
+{
+ const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
+
+ if (host_from == NULL) {
+ crm_warn("Ignoring shutdown request without origin specified");
+ return I_NULL;
+ }
+
+ if (pcmk__str_eq(host_from, controld_globals.dc_name,
+ pcmk__str_null_matches|pcmk__str_casei)) {
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ crm_info("Shutting down controller after confirmation from %s",
+ host_from);
+ } else {
+ crm_err("Shutting down controller after unexpected "
+ "shutdown request from %s", host_from);
+ controld_set_fsa_input_flags(R_STAYDOWN);
+ }
+ return I_STOP;
+ }
+
+ crm_warn("Ignoring shutdown request from %s because DC is %s",
+ host_from, controld_globals.dc_name);
+ return I_NULL;
+}
+
+static enum crmd_fsa_input
+handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause)
+{
+ xmlNode *msg = NULL;
+ const char *op = crm_element_value(stored_msg, F_CRM_TASK);
+
+ /* Optimize this for the DC - it has the most to do */
+
+ if (op == NULL) {
+ crm_log_xml_warn(stored_msg, "[request without " F_CRM_TASK "]");
+ return I_NULL;
+ }
+
+ if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
+ const char *from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
+ crm_node_t *node = pcmk__search_cluster_node_cache(0, from);
+
+ pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN);
+ if(AM_I_DC == FALSE) {
+ return I_NULL; /* Done */
+ }
+ }
+
+ /*========== DC-Only Actions ==========*/
+ if (AM_I_DC) {
+ if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) {
+ return I_NODE_JOIN;
+
+ } else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) {
+ return I_JOIN_REQUEST;
+
+ } else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) {
+ return I_JOIN_RESULT;
+
+ } else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
+ return handle_shutdown_self_ack(stored_msg);
+
+ } else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
+ // Another controller wants to shut down its node
+ return handle_shutdown_request(stored_msg);
+ }
+ }
+
+ /*========== common actions ==========*/
+ if (strcmp(op, CRM_OP_NOVOTE) == 0) {
+ ha_msg_input_t fsa_input;
+
+ fsa_input.msg = stored_msg;
+ register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
+ A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE,
+ __func__);
+
+ } else if (strcmp(op, CRM_OP_REMOTE_STATE) == 0) {
+ /* a remote connection host is letting us know the node state */
+ return handle_remote_state(stored_msg);
+
+ } else if (strcmp(op, CRM_OP_THROTTLE) == 0) {
+ throttle_update(stored_msg);
+ if (AM_I_DC && (controld_globals.transition_graph != NULL)
+ && !controld_globals.transition_graph->complete) {
+
+ crm_debug("The throttle changed. Trigger a graph.");
+ trigger_graph();
+ }
+ return I_NULL;
+
+ } else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) {
+ return handle_failcount_op(stored_msg);
+
+ } else if (strcmp(op, CRM_OP_VOTE) == 0) {
+ /* count the vote and decide what to do after that */
+ ha_msg_input_t fsa_input;
+
+ fsa_input.msg = stored_msg;
+ register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
+ A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE,
+ __func__);
+
+ /* Sometimes we _must_ go into S_ELECTION */
+ if (controld_globals.fsa_state == S_HALT) {
+ crm_debug("Forcing an election from S_HALT");
+ return I_ELECTION;
+#if 0
+ } else if (AM_I_DC) {
+ /* This is the old way of doing things but what is gained? */
+ return I_ELECTION;
+#endif
+ }
+
+ } else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) {
+ verify_feature_set(stored_msg);
+ crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
+ return I_JOIN_OFFER;
+
+ } else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) {
+ crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
+ return I_JOIN_RESULT;
+
+ } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0) {
+ return handle_lrm_delete(stored_msg);
+
+ } else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0)
+ || (strcmp(op, CRM_OP_LRM_REFRESH) == 0) // @COMPAT
+ || (strcmp(op, CRM_OP_REPROBE) == 0)) {
+
+ crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD);
+ return I_ROUTER;
+
+ } else if (strcmp(op, CRM_OP_NOOP) == 0) {
+ return I_NULL;
+
+ } else if (strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) {
+
+ crm_shutdown(SIGTERM);
+ /*return I_SHUTDOWN; */
+ return I_NULL;
+
+ } else if (strcmp(op, CRM_OP_PING) == 0) {
+ return handle_ping(stored_msg);
+
+ } else if (strcmp(op, CRM_OP_NODE_INFO) == 0) {
+ return handle_node_info_request(stored_msg);
+
+ } else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) {
+ int id = 0;
+ const char *name = NULL;
+
+ crm_element_value_int(stored_msg, XML_ATTR_ID, &id);
+ name = crm_element_value(stored_msg, XML_ATTR_UNAME);
+
+ if(cause == C_IPC_MESSAGE) {
+ msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
+ if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
+ crm_err("Could not instruct peers to remove references to node %s/%u", name, id);
+ } else {
+ crm_notice("Instructing peers to remove references to node %s/%u", name, id);
+ }
+ free_xml(msg);
+
+ } else {
+ reap_crm_member(id, name);
+
+ /* If we're forgetting this node, also forget any failures to fence
+ * it, so we don't carry that over to any node added later with the
+ * same name.
+ */
+ st_fail_count_reset(name);
+ }
+
+ } else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) {
+ xmlNode *xml = get_message_xml(stored_msg, F_CRM_DATA);
+
+ remote_ra_process_maintenance_nodes(xml);
+
+ } else if (strcmp(op, PCMK__CONTROLD_CMD_NODES) == 0) {
+ return handle_node_list(stored_msg);
+
+ /*========== (NOT_DC)-Only Actions ==========*/
+ } else if (!AM_I_DC) {
+
+ if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
+ return handle_shutdown_ack(stored_msg);
+ }
+
+ } else {
+ crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node");
+ crm_log_xml_err(stored_msg, "Unexpected");
+ }
+
+ return I_NULL;
+}
+
+static void
+handle_response(xmlNode *stored_msg)
+{
+ const char *op = crm_element_value(stored_msg, F_CRM_TASK);
+
+ if (op == NULL) {
+ crm_log_xml_err(stored_msg, "Bad message");
+
+ } else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) {
+ // Check whether scheduler answer been superseded by subsequent request
+ const char *msg_ref = crm_element_value(stored_msg, XML_ATTR_REFERENCE);
+
+ if (msg_ref == NULL) {
+ crm_err("%s - Ignoring calculation with no reference", op);
+
+ } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref,
+ pcmk__str_none)) {
+ ha_msg_input_t fsa_input;
+
+ controld_stop_sched_timer();
+ fsa_input.msg = stored_msg;
+ register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
+
+ } else {
+ crm_info("%s calculation %s is obsolete", op, msg_ref);
+ }
+
+ } else if (strcmp(op, CRM_OP_VOTE) == 0
+ || strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) {
+
+ } else {
+ const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
+
+ crm_err("Unexpected response (op=%s, src=%s) sent to the %s",
+ op, host_from, AM_I_DC ? "DC" : "controller");
+ }
+}
+
+static enum crmd_fsa_input
+handle_shutdown_request(xmlNode * stored_msg)
+{
+ /* handle here to avoid potential version issues
+ * where the shutdown message/procedure may have
+ * been changed in later versions.
+ *
+ * This way the DC is always in control of the shutdown
+ */
+
+ char *now_s = NULL;
+ const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
+
+ if (host_from == NULL) {
+ /* we're shutting down and the DC */
+ host_from = controld_globals.our_nodename;
+ }
+
+ crm_info("Creating shutdown request for %s (state=%s)", host_from,
+ fsa_state2string(controld_globals.fsa_state));
+ crm_log_xml_trace(stored_msg, "message");
+
+ now_s = pcmk__ttoa(time(NULL));
+ update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, FALSE);
+ free(now_s);
+
+ /* will be picked up by the TE as long as its running */
+ return I_NULL;
+}
+
+static void
+send_msg_via_ipc(xmlNode * msg, const char *sys)
+{
+ pcmk__client_t *client_channel = NULL;
+
+ CRM_CHECK(sys != NULL, return);
+
+ client_channel = pcmk__find_client_by_id(sys);
+
+ if (crm_element_value(msg, F_CRM_HOST_FROM) == NULL) {
+ crm_xml_add(msg, F_CRM_HOST_FROM, controld_globals.our_nodename);
+ }
+
+ if (client_channel != NULL) {
+ /* Transient clients such as crmadmin */
+ pcmk__ipc_send_xml(client_channel, 0, msg, crm_ipc_server_event);
+
+ } else if (pcmk__str_eq(sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
+ xmlNode *data = get_message_xml(msg, F_CRM_DATA);
+
+ process_te_message(msg, data);
+
+ } else if (pcmk__str_eq(sys, CRM_SYSTEM_LRMD, pcmk__str_none)) {
+ fsa_data_t fsa_data;
+ ha_msg_input_t fsa_input;
+
+ fsa_input.msg = msg;
+ fsa_input.xml = get_message_xml(msg, F_CRM_DATA);
+
+ fsa_data.id = 0;
+ fsa_data.actions = 0;
+ fsa_data.data = &fsa_input;
+ fsa_data.fsa_input = I_MESSAGE;
+ fsa_data.fsa_cause = C_IPC_MESSAGE;
+ fsa_data.origin = __func__;
+ fsa_data.data_type = fsa_dt_ha_msg;
+
+ do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, controld_globals.fsa_state,
+ I_MESSAGE, &fsa_data);
+
+ } else if (crmd_is_proxy_session(sys)) {
+ crmd_proxy_send(sys, msg);
+
+ } else {
+ crm_info("Received invalid request: unknown subsystem '%s'", sys);
+ }
+}
+
+void
+delete_ha_msg_input(ha_msg_input_t * orig)
+{
+ if (orig == NULL) {
+ return;
+ }
+ free_xml(orig->msg);
+ free(orig);
+}
+
+/*!
+ * \internal
+ * \brief Notify the cluster of a remote node state change
+ *
+ * \param[in] node_name Node's name
+ * \param[in] node_up true if node is up, false if down
+ */
+void
+broadcast_remote_state_message(const char *node_name, bool node_up)
+{
+ xmlNode *msg = create_request(CRM_OP_REMOTE_STATE, NULL, NULL,
+ CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
+
+ crm_info("Notifying cluster of Pacemaker Remote node %s %s",
+ node_name, node_up? "coming up" : "going down");
+
+ crm_xml_add(msg, XML_ATTR_ID, node_name);
+ pcmk__xe_set_bool_attr(msg, XML_NODE_IN_CLUSTER, node_up);
+
+ if (node_up) {
+ crm_xml_add(msg, PCMK__XA_CONN_HOST, controld_globals.our_nodename);
+ }
+
+ send_cluster_message(NULL, crm_msg_crmd, msg, TRUE);
+ free_xml(msg);
+}
+
diff --git a/daemons/controld/controld_messages.h b/daemons/controld/controld_messages.h
new file mode 100644
index 0000000..4108961
--- /dev/null
+++ b/daemons/controld/controld_messages.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef XML_CRM_MESSAGES__H
+# define XML_CRM_MESSAGES__H
+
+# include <crm/crm.h>
+# include <crm/common/ipc_internal.h>
+# include <crm/common/xml.h>
+# include <crm/cluster/internal.h>
+# include <controld_fsa.h>
+
+typedef struct ha_msg_input_s {
+ xmlNode *msg;
+ xmlNode *xml;
+
+} ha_msg_input_t;
+
+extern void delete_ha_msg_input(ha_msg_input_t * orig);
+
+extern void *fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type,
+ const char *caller);
+
+# define fsa_typed_data(x) fsa_typed_data_adv(msg_data, x, __func__)
+
+extern void register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
+ fsa_data_t * cur_data, void *new_data, const char *raised_from);
+
+#define register_fsa_error(cause, input, new_data) \
+ register_fsa_error_adv(cause, input, msg_data, new_data, __func__)
+
+void register_fsa_input_adv(enum crmd_fsa_cause cause,
+ enum crmd_fsa_input input, void *data,
+ uint64_t with_actions, gboolean prepend,
+ const char *raised_from);
+
+extern void fsa_dump_queue(int log_level);
+extern void route_message(enum crmd_fsa_cause cause, xmlNode * input);
+
+# define crmd_fsa_stall(suppress) do { \
+ if(suppress == FALSE && msg_data != NULL) { \
+ register_fsa_input_adv( \
+ ((fsa_data_t*)msg_data)->fsa_cause, I_WAIT_FOR_EVENT, \
+ ((fsa_data_t*)msg_data)->data, action, TRUE, __func__); \
+ } else { \
+ register_fsa_input_adv( \
+ C_FSA_INTERNAL, I_WAIT_FOR_EVENT, \
+ NULL, action, TRUE, __func__); \
+ } \
+ } while(0)
+
+#define register_fsa_input(cause, input, data) \
+ register_fsa_input_adv(cause, input, data, A_NOTHING, FALSE, __func__)
+
+#define register_fsa_input_before(cause, input, data) \
+ register_fsa_input_adv(cause, input, data, A_NOTHING, TRUE, __func__)
+
+#define register_fsa_input_later(cause, input, data) \
+ register_fsa_input_adv(cause, input, data, A_NOTHING, FALSE, __func__)
+
+void delete_fsa_input(fsa_data_t * fsa_data);
+
+fsa_data_t *get_message(void);
+
+extern gboolean relay_message(xmlNode * relay_message, gboolean originated_locally);
+
+gboolean crmd_is_proxy_session(const char *session);
+void crmd_proxy_send(const char *session, xmlNode *msg);
+
+bool controld_authorize_ipc_message(const xmlNode *client_msg,
+ pcmk__client_t *curr_client,
+ const char *proxy_session);
+
+extern gboolean send_request(xmlNode * msg, char **msg_reference);
+
+extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig);
+
+void broadcast_remote_state_message(const char *node_name, bool node_up);
+
+#endif
diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c
new file mode 100644
index 0000000..240a978
--- /dev/null
+++ b/daemons/controld/controld_metadata.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright 2017-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdio.h>
+#include <glib.h>
+#include <regex.h>
+
+#include <crm/crm.h>
+#include <crm/lrmd.h>
+
+#include <pacemaker-controld.h>
+
+static void
+ra_param_free(void *param)
+{
+ if (param) {
+ struct ra_param_s *p = (struct ra_param_s *) param;
+
+ if (p->rap_name) {
+ free(p->rap_name);
+ }
+ free(param);
+ }
+}
+
+static void
+metadata_free(void *metadata)
+{
+ if (metadata) {
+ struct ra_metadata_s *md = (struct ra_metadata_s *) metadata;
+
+ g_list_free_full(md->ra_params, ra_param_free);
+ free(metadata);
+ }
+}
+
+GHashTable *
+metadata_cache_new(void)
+{
+ return pcmk__strkey_table(free, metadata_free);
+}
+
+void
+metadata_cache_free(GHashTable *mdc)
+{
+ if (mdc) {
+ crm_trace("Destroying metadata cache with %d members", g_hash_table_size(mdc));
+ g_hash_table_destroy(mdc);
+ }
+}
+
+void
+metadata_cache_reset(GHashTable *mdc)
+{
+ if (mdc) {
+ crm_trace("Resetting metadata cache with %d members",
+ g_hash_table_size(mdc));
+ g_hash_table_remove_all(mdc);
+ }
+}
+
+static struct ra_param_s *
+ra_param_from_xml(xmlNode *param_xml)
+{
+ const char *param_name = crm_element_value(param_xml, "name");
+ struct ra_param_s *p;
+
+ p = calloc(1, sizeof(struct ra_param_s));
+ if (p == NULL) {
+ return NULL;
+ }
+
+ p->rap_name = strdup(param_name);
+ if (p->rap_name == NULL) {
+ free(p);
+ return NULL;
+ }
+
+ if (pcmk__xe_attr_is_true(param_xml, "reloadable")) {
+ controld_set_ra_param_flags(p, ra_param_reloadable);
+ }
+
+ if (pcmk__xe_attr_is_true(param_xml, "unique")) {
+ controld_set_ra_param_flags(p, ra_param_unique);
+ }
+
+ if (pcmk__xe_attr_is_true(param_xml, "private")) {
+ controld_set_ra_param_flags(p, ra_param_private);
+ }
+ return p;
+}
+
+static void
+log_ra_ocf_version(const char *ra_key, const char *ra_ocf_version)
+{
+ if (pcmk__str_empty(ra_ocf_version)) {
+ crm_warn("%s does not advertise OCF version supported", ra_key);
+
+ } else if (compare_version(ra_ocf_version, "2") >= 0) {
+ crm_warn("%s supports OCF version %s (this Pacemaker version supports "
+ PCMK_OCF_VERSION " and might not work properly with agent)",
+ ra_key, ra_ocf_version);
+
+ } else if (compare_version(ra_ocf_version, PCMK_OCF_VERSION) > 0) {
+ crm_info("%s supports OCF version %s (this Pacemaker version supports "
+ PCMK_OCF_VERSION " and might not use all agent features)",
+ ra_key, ra_ocf_version);
+
+ } else {
+ crm_debug("%s supports OCF version %s", ra_key, ra_ocf_version);
+ }
+}
+
+struct ra_metadata_s *
+controld_cache_metadata(GHashTable *mdc, const lrmd_rsc_info_t *rsc,
+ const char *metadata_str)
+{
+ char *key = NULL;
+ const char *reason = NULL;
+ xmlNode *metadata = NULL;
+ xmlNode *match = NULL;
+ struct ra_metadata_s *md = NULL;
+ bool any_private_params = false;
+ bool ocf1_1 = false;
+
+ CRM_CHECK(mdc && rsc && metadata_str, return NULL);
+
+ key = crm_generate_ra_key(rsc->standard, rsc->provider, rsc->type);
+ if (!key) {
+ reason = "Invalid resource agent standard or type";
+ goto err;
+ }
+
+ metadata = string2xml(metadata_str);
+ if (!metadata) {
+ reason = "Metadata is not valid XML";
+ goto err;
+ }
+
+ md = calloc(1, sizeof(struct ra_metadata_s));
+ if (md == NULL) {
+ reason = "Could not allocate memory";
+ goto err;
+ }
+
+ if (strcmp(rsc->standard, PCMK_RESOURCE_CLASS_OCF) == 0) {
+ xmlChar *content = NULL;
+ xmlNode *version_element = first_named_child(metadata, "version");
+
+ if (version_element != NULL) {
+ content = xmlNodeGetContent(version_element);
+ }
+ log_ra_ocf_version(key, (const char *) content);
+ if (content != NULL) {
+ ocf1_1 = (compare_version((const char *) content, "1.1") >= 0);
+ xmlFree(content);
+ }
+ }
+
+ // Check supported actions
+ match = first_named_child(metadata, "actions");
+ for (match = first_named_child(match, "action"); match != NULL;
+ match = crm_next_same_xml(match)) {
+
+ const char *action_name = crm_element_value(match, "name");
+
+ if (pcmk__str_eq(action_name, CRMD_ACTION_RELOAD_AGENT,
+ pcmk__str_none)) {
+ if (ocf1_1) {
+ controld_set_ra_flags(md, key, ra_supports_reload_agent);
+ } else {
+ crm_notice("reload-agent action will not be used with %s "
+ "because it does not support OCF 1.1 or later", key);
+ }
+
+ } else if (!ocf1_1 && pcmk__str_eq(action_name, CRMD_ACTION_RELOAD,
+ pcmk__str_casei)) {
+ controld_set_ra_flags(md, key, ra_supports_legacy_reload);
+ }
+ }
+
+ // Build a parameter list
+ match = first_named_child(metadata, "parameters");
+ for (match = first_named_child(match, "parameter"); match != NULL;
+ match = crm_next_same_xml(match)) {
+
+ const char *param_name = crm_element_value(match, "name");
+
+ if (param_name == NULL) {
+ crm_warn("Metadata for %s:%s:%s has parameter without a name",
+ rsc->standard, rsc->provider, rsc->type);
+ } else {
+ struct ra_param_s *p = ra_param_from_xml(match);
+
+ if (p == NULL) {
+ reason = "Could not allocate memory";
+ goto err;
+ }
+ if (pcmk_is_set(p->rap_flags, ra_param_private)) {
+ any_private_params = true;
+ }
+ md->ra_params = g_list_prepend(md->ra_params, p);
+ }
+ }
+
+ /* Newer resource agents support the "private" parameter attribute to
+ * indicate sensitive parameters. For backward compatibility with older
+ * agents, implicitly treat a few common names as private when the agent
+ * doesn't specify any explicitly.
+ */
+ if (!any_private_params) {
+ for (GList *iter = md->ra_params; iter != NULL; iter = iter->next) {
+ struct ra_param_s *p = iter->data;
+
+ if (pcmk__str_any_of(p->rap_name, "password", "passwd", "user",
+ NULL)) {
+ controld_set_ra_param_flags(p, ra_param_private);
+ }
+ }
+ }
+
+ g_hash_table_replace(mdc, key, md);
+ free_xml(metadata);
+ return md;
+
+err:
+ crm_warn("Unable to update metadata for %s (%s%s%s:%s): %s",
+ rsc->id, rsc->standard, ((rsc->provider == NULL)? "" : ":"),
+ pcmk__s(rsc->provider, ""), rsc->type, reason);
+ free(key);
+ free_xml(metadata);
+ metadata_free(md);
+ return NULL;
+}
+
+/*!
+ * \internal
+ * \brief Get meta-data for a resource
+ *
+ * \param[in,out] lrm_state Use meta-data cache from this executor connection
+ * \param[in] rsc Resource to get meta-data for
+ * \param[in] source Allowed meta-data sources (bitmask of
+ * enum controld_metadata_source_e values)
+ *
+ * \return Meta-data cache entry for given resource, or NULL if not available
+ */
+struct ra_metadata_s *
+controld_get_rsc_metadata(lrm_state_t *lrm_state, const lrmd_rsc_info_t *rsc,
+ uint32_t source)
+{
+ struct ra_metadata_s *metadata = NULL;
+ char *metadata_str = NULL;
+ char *key = NULL;
+ int rc = pcmk_ok;
+
+ CRM_CHECK((lrm_state != NULL) && (rsc != NULL), return NULL);
+
+ if (pcmk_is_set(source, controld_metadata_from_cache)) {
+ key = crm_generate_ra_key(rsc->standard, rsc->provider, rsc->type);
+ if (key != NULL) {
+ metadata = g_hash_table_lookup(lrm_state->metadata_cache, key);
+ free(key);
+ }
+ if (metadata != NULL) {
+ crm_debug("Retrieved metadata for %s (%s%s%s:%s) from cache",
+ rsc->id, rsc->standard,
+ ((rsc->provider == NULL)? "" : ":"),
+ ((rsc->provider == NULL)? "" : rsc->provider),
+ rsc->type);
+ return metadata;
+ }
+ }
+
+ if (!pcmk_is_set(source, controld_metadata_from_agent)) {
+ return NULL;
+ }
+
+ /* For most actions, metadata was cached asynchronously before action
+ * execution (via metadata_complete()).
+ *
+ * However if that failed, and for other actions, retrieve the metadata now
+ * via a local, synchronous, direct execution of the agent.
+ *
+ * This has multiple issues, which is why this is just a fallback: the
+ * executor should execute agents, not the controller; metadata for
+ * Pacemaker Remote nodes should be collected on those nodes, not locally;
+ * the metadata call shouldn't eat into the timeout of the real action being
+ * performed; and the synchronous call blocks the controller (which also
+ * means that if the metadata action tries to contact the controller,
+ * everything will hang until the timeout).
+ */
+ crm_debug("Retrieving metadata for %s (%s%s%s:%s) synchronously",
+ rsc->id, rsc->standard,
+ ((rsc->provider == NULL)? "" : ":"),
+ ((rsc->provider == NULL)? "" : rsc->provider),
+ rsc->type);
+ rc = lrm_state_get_metadata(lrm_state, rsc->standard, rsc->provider,
+ rsc->type, &metadata_str, 0);
+ if (rc != pcmk_ok) {
+ crm_warn("Failed to get metadata for %s (%s%s%s:%s): %s",
+ rsc->id, rsc->standard,
+ ((rsc->provider == NULL)? "" : ":"),
+ ((rsc->provider == NULL)? "" : rsc->provider),
+ rsc->type, pcmk_strerror(rc));
+ return NULL;
+ }
+
+ metadata = controld_cache_metadata(lrm_state->metadata_cache, rsc,
+ metadata_str);
+ free(metadata_str);
+ return metadata;
+}
diff --git a/daemons/controld/controld_metadata.h b/daemons/controld/controld_metadata.h
new file mode 100644
index 0000000..12ea327
--- /dev/null
+++ b/daemons/controld/controld_metadata.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2017-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef CRMD_METADATA_H
+#define CRMD_METADATA_H
+
+#include <stdint.h> // uint32_t
+#include <glib.h> // GList, GHashTable
+#include "controld_lrm.h" // lrm_state_t, lrm_rsc_info_t
+
+/*
+ * @COMPAT pre-OCF-1.1 resource agents
+ *
+ * Pacemaker previously used the "reload" action to reload agent parameters,
+ * but most agents used it to reload the service configuration. Pacemaker also
+ * misused the OCF 1.0 "unique" parameter attribute to indicate reloadability.
+ *
+ * OCF 1.1 created the "reload-agent" action and "reloadable" parameter
+ * attribute for the Pacemaker usage.
+ *
+ * Pacemaker now supports the OCF 1.1 usage. The old usage is now deprecated,
+ * but will be supported if the agent does not claim OCF 1.1 or later
+ * compliance and does not advertise the reload-agent action.
+ */
+enum ra_flags_e {
+ ra_supports_legacy_reload = (1 << 0),
+ ra_supports_reload_agent = (1 << 1),
+};
+
+enum ra_param_flags_e {
+ ra_param_unique = (1 << 0),
+ ra_param_private = (1 << 1),
+ ra_param_reloadable = (1 << 2),
+};
+
+// Allowed sources of resource agent meta-data when requesting it
+enum controld_metadata_source_e {
+ controld_metadata_from_cache = (1 << 0),
+ controld_metadata_from_agent = (1 << 1),
+};
+
+struct ra_param_s {
+ char *rap_name;
+ uint32_t rap_flags; // bitmask of ra_param_flags_s
+};
+
+struct ra_metadata_s {
+ GList *ra_params; // ra_param_s
+ uint32_t ra_flags; // bitmask of ra_flags_e
+};
+
+#define controld_set_ra_flags(ra_md, ra_key, flags_to_set) do { \
+ (ra_md)->ra_flags = pcmk__set_flags_as(__func__, __LINE__, \
+ LOG_TRACE, "Resource agent", ra_key, \
+ (ra_md)->ra_flags, (flags_to_set), #flags_to_set); \
+ } while (0)
+
+#define controld_set_ra_param_flags(ra_param, flags_to_set) do { \
+ (ra_param)->rap_flags = pcmk__set_flags_as(__func__, __LINE__, \
+ LOG_TRACE, "Resource agent parameter", (ra_param)->rap_name, \
+ (ra_param)->rap_flags, (flags_to_set), #flags_to_set); \
+ } while (0)
+
+GHashTable *metadata_cache_new(void);
+void metadata_cache_free(GHashTable *mdc);
+void metadata_cache_reset(GHashTable *mdc);
+
+struct ra_metadata_s *controld_cache_metadata(GHashTable *mdc,
+ const lrmd_rsc_info_t *rsc,
+ const char *metadata_str);
+struct ra_metadata_s *controld_get_rsc_metadata(lrm_state_t *lrm_state,
+ const lrmd_rsc_info_t *rsc,
+ uint32_t source);
+
+static inline const char *
+ra_param_flag2text(enum ra_param_flags_e flag)
+{
+ switch (flag) {
+ case ra_param_reloadable:
+ return "reloadable";
+ case ra_param_unique:
+ return "unique";
+ case ra_param_private:
+ return "private";
+ default:
+ return "unknown";
+ }
+}
+
+#endif
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
new file mode 100644
index 0000000..f24b755
--- /dev/null
+++ b/daemons/controld/controld_remote_ra.c
@@ -0,0 +1,1440 @@
+/*
+ * Copyright 2013-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml_internal.h>
+#include <crm/lrmd.h>
+#include <crm/lrmd_internal.h>
+#include <crm/services.h>
+
+#include <pacemaker-controld.h>
+
+#define REMOTE_LRMD_RA "remote"
+
+/* The max start timeout before cmd retry */
+#define MAX_START_TIMEOUT_MS 10000
+
+#define cmd_set_flags(cmd, flags_to_set) do { \
+ (cmd)->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
+ "Remote command", (cmd)->rsc_id, (cmd)->status, \
+ (flags_to_set), #flags_to_set); \
+ } while (0)
+
+#define cmd_clear_flags(cmd, flags_to_clear) do { \
+ (cmd)->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
+ "Remote command", (cmd)->rsc_id, (cmd)->status, \
+ (flags_to_clear), #flags_to_clear); \
+ } while (0)
+
+enum remote_cmd_status {
+ cmd_reported_success = (1 << 0),
+ cmd_cancel = (1 << 1),
+};
+
+typedef struct remote_ra_cmd_s {
+ /*! the local node the cmd is issued from */
+ char *owner;
+ /*! the remote node the cmd is executed on */
+ char *rsc_id;
+ /*! the action to execute */
+ char *action;
+ /*! some string the client wants us to give it back */
+ char *userdata;
+ /*! start delay in ms */
+ int start_delay;
+ /*! timer id used for start delay. */
+ int delay_id;
+ /*! timeout in ms for cmd */
+ int timeout;
+ int remaining_timeout;
+ /*! recurring interval in ms */
+ guint interval_ms;
+ /*! interval timer id */
+ int interval_id;
+ int monitor_timeout_id;
+ int takeover_timeout_id;
+ /*! action parameters */
+ lrmd_key_value_t *params;
+ pcmk__action_result_t result;
+ int call_id;
+ time_t start_time;
+ uint32_t status;
+} remote_ra_cmd_t;
+
+#define lrm_remote_set_flags(lrm_state, flags_to_set) do { \
+ lrm_state_t *lrm = (lrm_state); \
+ remote_ra_data_t *ra = lrm->remote_ra_data; \
+ ra->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
+ lrm->node_name, ra->status, \
+ (flags_to_set), #flags_to_set); \
+ } while (0)
+
+#define lrm_remote_clear_flags(lrm_state, flags_to_clear) do { \
+ lrm_state_t *lrm = (lrm_state); \
+ remote_ra_data_t *ra = lrm->remote_ra_data; \
+ ra->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
+ lrm->node_name, ra->status, \
+ (flags_to_clear), #flags_to_clear); \
+ } while (0)
+
+enum remote_status {
+ expect_takeover = (1 << 0),
+ takeover_complete = (1 << 1),
+ remote_active = (1 << 2),
+ /* Maintenance mode is difficult to determine from the controller's context,
+ * so we have it signalled back with the transition from the scheduler.
+ */
+ remote_in_maint = (1 << 3),
+ /* Similar for whether we are controlling a guest node or remote node.
+ * Fortunately there is a meta-attribute in the transition already and
+ * as the situation doesn't change over time we can use the
+ * resource start for noting down the information for later use when
+ * the attributes aren't at hand.
+ */
+ controlling_guest = (1 << 4),
+};
+
+typedef struct remote_ra_data_s {
+ crm_trigger_t *work;
+ remote_ra_cmd_t *cur_cmd;
+ GList *cmds;
+ GList *recurring_cmds;
+ uint32_t status;
+} remote_ra_data_t;
+
+static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
+static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
+static GList *fail_all_monitor_cmds(GList * list);
+
+static void
+free_cmd(gpointer user_data)
+{
+ remote_ra_cmd_t *cmd = user_data;
+
+ if (!cmd) {
+ return;
+ }
+ if (cmd->delay_id) {
+ g_source_remove(cmd->delay_id);
+ }
+ if (cmd->interval_id) {
+ g_source_remove(cmd->interval_id);
+ }
+ if (cmd->monitor_timeout_id) {
+ g_source_remove(cmd->monitor_timeout_id);
+ }
+ if (cmd->takeover_timeout_id) {
+ g_source_remove(cmd->takeover_timeout_id);
+ }
+ free(cmd->owner);
+ free(cmd->rsc_id);
+ free(cmd->action);
+ free(cmd->userdata);
+ pcmk__reset_result(&(cmd->result));
+ lrmd_key_value_freeall(cmd->params);
+ free(cmd);
+}
+
+static int
+generate_callid(void)
+{
+ static int remote_ra_callid = 0;
+
+ remote_ra_callid++;
+ if (remote_ra_callid <= 0) {
+ remote_ra_callid = 1;
+ }
+
+ return remote_ra_callid;
+}
+
+static gboolean
+recurring_helper(gpointer data)
+{
+ remote_ra_cmd_t *cmd = data;
+ lrm_state_t *connection_rsc = NULL;
+
+ cmd->interval_id = 0;
+ connection_rsc = lrm_state_find(cmd->rsc_id);
+ if (connection_rsc && connection_rsc->remote_ra_data) {
+ remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
+
+ ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
+
+ ra_data->cmds = g_list_append(ra_data->cmds, cmd);
+ mainloop_set_trigger(ra_data->work);
+ }
+ return FALSE;
+}
+
+static gboolean
+start_delay_helper(gpointer data)
+{
+ remote_ra_cmd_t *cmd = data;
+ lrm_state_t *connection_rsc = NULL;
+
+ cmd->delay_id = 0;
+ connection_rsc = lrm_state_find(cmd->rsc_id);
+ if (connection_rsc && connection_rsc->remote_ra_data) {
+ remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
+
+ mainloop_set_trigger(ra_data->work);
+ }
+ return FALSE;
+}
+
+static bool
+should_purge_attributes(crm_node_t *node)
+{
+ bool purge = true;
+ crm_node_t *conn_node = NULL;
+ lrm_state_t *connection_rsc = NULL;
+
+ if (!node->conn_host) {
+ return purge;
+ }
+
+ /* Get the node that was hosting the remote connection resource from the
+ * peer cache. That's the one we really care about here.
+ */
+ conn_node = crm_get_peer(0, node->conn_host);
+ if (conn_node == NULL) {
+ return purge;
+ }
+
+ /* Check the uptime of connection_rsc. If it hasn't been running long
+ * enough, set purge=true. "Long enough" means it started running earlier
+ * than the timestamp when we noticed it went away in the first place.
+ */
+ connection_rsc = lrm_state_find(node->uname);
+
+ if (connection_rsc != NULL) {
+ lrmd_t *lrm = connection_rsc->conn;
+ time_t uptime = lrmd__uptime(lrm);
+ time_t now = time(NULL);
+
+ /* Add 20s of fuzziness to give corosync a while to notice the remote
+ * host is gone. On various error conditions (failure to get uptime,
+ * peer_lost isn't set) we default to purging.
+ */
+ if (uptime > 0 &&
+ conn_node->peer_lost > 0 &&
+ uptime + 20 >= now - conn_node->peer_lost) {
+ purge = false;
+ }
+ }
+
+ return purge;
+}
+
+static enum controld_section_e
+section_to_delete(bool purge)
+{
+ if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
+ if (purge) {
+ return controld_section_all_unlocked;
+ } else {
+ return controld_section_lrm_unlocked;
+ }
+ } else {
+ if (purge) {
+ return controld_section_all;
+ } else {
+ return controld_section_lrm;
+ }
+ }
+}
+
+static void
+purge_remote_node_attrs(int call_opt, crm_node_t *node)
+{
+ bool purge = should_purge_attributes(node);
+ enum controld_section_e section = section_to_delete(purge);
+
+ /* Purge node from attrd's memory */
+ if (purge) {
+ update_attrd_remote_node_removed(node->uname, NULL);
+ }
+
+ controld_delete_node_state(node->uname, section, call_opt);
+}
+
+/*!
+ * \internal
+ * \brief Handle cluster communication related to pacemaker_remote node joining
+ *
+ * \param[in] node_name Name of newly integrated pacemaker_remote node
+ */
+static void
+remote_node_up(const char *node_name)
+{
+ int call_opt;
+ xmlNode *update, *state;
+ crm_node_t *node;
+
+ CRM_CHECK(node_name != NULL, return);
+ crm_info("Announcing Pacemaker Remote node %s", node_name);
+
+ call_opt = crmd_cib_smart_opt();
+
+ /* Delete node's probe_complete attribute. This serves two purposes:
+ *
+ * - @COMPAT DCs < 1.1.14 in a rolling upgrade might use it
+ * - deleting it (or any attribute for that matter) here ensures the
+ * attribute manager learns the node is remote
+ */
+ update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
+
+ /* Ensure node is in the remote peer cache with member status */
+ node = crm_remote_peer_get(node_name);
+ CRM_CHECK(node != NULL, return);
+
+ purge_remote_node_attrs(call_opt, node);
+ pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
+
+ /* pacemaker_remote nodes don't participate in the membership layer,
+ * so cluster nodes don't automatically get notified when they come and go.
+ * We send a cluster message to the DC, and update the CIB node state entry,
+ * so the DC will get it sooner (via message) or later (via CIB refresh),
+ * and any other interested parties can query the CIB.
+ */
+ broadcast_remote_state_message(node_name, true);
+
+ update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
+ state = create_node_state_update(node, node_update_cluster, update,
+ __func__);
+
+ /* Clear the XML_NODE_IS_FENCED flag in the node state. If the node ever
+ * needs to be fenced, this flag will allow various actions to determine
+ * whether the fencing has happened yet.
+ */
+ crm_xml_add(state, XML_NODE_IS_FENCED, "0");
+
+ /* TODO: If the remote connection drops, and this (async) CIB update either
+ * failed or has not yet completed, later actions could mistakenly think the
+ * node has already been fenced (if the XML_NODE_IS_FENCED attribute was
+ * previously set, because it won't have been cleared). This could prevent
+ * actual fencing or allow recurring monitor failures to be cleared too
+ * soon. Ideally, we wouldn't rely on the CIB for the fenced status.
+ */
+ controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, NULL);
+ free_xml(update);
+}
+
+enum down_opts {
+ DOWN_KEEP_LRM,
+ DOWN_ERASE_LRM
+};
+
+/*!
+ * \internal
+ * \brief Handle cluster communication related to pacemaker_remote node leaving
+ *
+ * \param[in] node_name Name of lost node
+ * \param[in] opts Whether to keep or erase LRM history
+ */
+static void
+remote_node_down(const char *node_name, const enum down_opts opts)
+{
+ xmlNode *update;
+ int call_opt = crmd_cib_smart_opt();
+ crm_node_t *node;
+
+ /* Purge node from attrd's memory */
+ update_attrd_remote_node_removed(node_name, NULL);
+
+ /* Normally, only node attributes should be erased, and the resource history
+ * should be kept until the node comes back up. However, after a successful
+ * fence, we want to clear the history as well, so we don't think resources
+ * are still running on the node.
+ */
+ if (opts == DOWN_ERASE_LRM) {
+ controld_delete_node_state(node_name, controld_section_all, call_opt);
+ } else {
+ controld_delete_node_state(node_name, controld_section_attrs, call_opt);
+ }
+
+ /* Ensure node is in the remote peer cache with lost state */
+ node = crm_remote_peer_get(node_name);
+ CRM_CHECK(node != NULL, return);
+ pcmk__update_peer_state(__func__, node, CRM_NODE_LOST, 0);
+
+ /* Notify DC */
+ broadcast_remote_state_message(node_name, false);
+
+ /* Update CIB node state */
+ update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
+ create_node_state_update(node, node_update_cluster, update, __func__);
+ controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, NULL);
+ free_xml(update);
+}
+
+/*!
+ * \internal
+ * \brief Handle effects of a remote RA command on node state
+ *
+ * \param[in] cmd Completed remote RA command
+ */
+static void
+check_remote_node_state(const remote_ra_cmd_t *cmd)
+{
+ /* Only successful actions can change node state */
+ if (!pcmk__result_ok(&(cmd->result))) {
+ return;
+ }
+
+ if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
+ remote_node_up(cmd->rsc_id);
+
+ } else if (pcmk__str_eq(cmd->action, "migrate_from", pcmk__str_casei)) {
+ /* After a successful migration, we don't need to do remote_node_up()
+ * because the DC already knows the node is up, and we don't want to
+ * clear LRM history etc. We do need to add the remote node to this
+ * host's remote peer cache, because (unless it happens to be DC)
+ * it hasn't been tracking the remote node, and other code relies on
+ * the cache to distinguish remote nodes from unseen cluster nodes.
+ */
+ crm_node_t *node = crm_remote_peer_get(cmd->rsc_id);
+
+ CRM_CHECK(node != NULL, return);
+ pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
+
+ } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
+ lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
+ remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
+
+ if (ra_data) {
+ if (!pcmk_is_set(ra_data->status, takeover_complete)) {
+ /* Stop means down if we didn't successfully migrate elsewhere */
+ remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
+ } else if (AM_I_DC == FALSE) {
+ /* Only the connection host and DC track node state,
+ * so if the connection migrated elsewhere and we aren't DC,
+ * un-cache the node, so we don't have stale info
+ */
+ crm_remote_peer_cache_remove(cmd->rsc_id);
+ }
+ }
+ }
+
+ /* We don't do anything for successful monitors, which is correct for
+ * routine recurring monitors, and for monitors on nodes where the
+ * connection isn't supposed to be (the cluster will stop the connection in
+ * that case). However, if the initial probe finds the connection already
+ * active on the node where we want it, we probably should do
+ * remote_node_up(). Unfortunately, we can't distinguish that case here.
+ * Given that connections have to be initiated by the cluster, the chance of
+ * that should be close to zero.
+ */
+}
+
+static void
+report_remote_ra_result(remote_ra_cmd_t * cmd)
+{
+ lrmd_event_data_t op = { 0, };
+
+ check_remote_node_state(cmd);
+
+ op.type = lrmd_event_exec_complete;
+ op.rsc_id = cmd->rsc_id;
+ op.op_type = cmd->action;
+ op.user_data = cmd->userdata;
+ op.timeout = cmd->timeout;
+ op.interval_ms = cmd->interval_ms;
+ op.t_run = (unsigned int) cmd->start_time;
+ op.t_rcchange = (unsigned int) cmd->start_time;
+
+ lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status,
+ cmd->result.exit_reason);
+
+ if (pcmk_is_set(cmd->status, cmd_reported_success) && !pcmk__result_ok(&(cmd->result))) {
+ op.t_rcchange = (unsigned int) time(NULL);
+ /* This edge case will likely never ever occur, but if it does the
+ * result is that a failure will not be processed correctly. This is only
+ * remotely possible because we are able to detect a connection resource's tcp
+ * connection has failed at any moment after start has completed. The actual
+ * recurring operation is just a connectivity ping.
+ *
+ * basically, we are not guaranteed that the first successful monitor op and
+ * a subsequent failed monitor op will not occur in the same timestamp. We have to
+ * make it look like the operations occurred at separate times though. */
+ if (op.t_rcchange == op.t_run) {
+ op.t_rcchange++;
+ }
+ }
+
+ if (cmd->params) {
+ lrmd_key_value_t *tmp;
+
+ op.params = pcmk__strkey_table(free, free);
+ for (tmp = cmd->params; tmp; tmp = tmp->next) {
+ g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value));
+ }
+
+ }
+ op.call_id = cmd->call_id;
+ op.remote_nodename = cmd->owner;
+
+ lrm_op_callback(&op);
+
+ if (op.params) {
+ g_hash_table_destroy(op.params);
+ }
+ lrmd__reset_result(&op);
+}
+
+static void
+update_remaining_timeout(remote_ra_cmd_t * cmd)
+{
+ cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
+}
+
+static gboolean
+retry_start_cmd_cb(gpointer data)
+{
+ lrm_state_t *lrm_state = data;
+ remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
+ remote_ra_cmd_t *cmd = NULL;
+ int rc = ETIME;
+
+ if (!ra_data || !ra_data->cur_cmd) {
+ return FALSE;
+ }
+ cmd = ra_data->cur_cmd;
+ if (!pcmk__strcase_any_of(cmd->action, "start", "migrate_from", NULL)) {
+ return FALSE;
+ }
+ update_remaining_timeout(cmd);
+
+ if (cmd->remaining_timeout > 0) {
+ rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
+ } else {
+ pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
+ PCMK_EXEC_TIMEOUT,
+ "Not enough time remains to retry remote connection");
+ }
+
+ if (rc != pcmk_rc_ok) {
+ report_remote_ra_result(cmd);
+
+ if (ra_data->cmds) {
+ mainloop_set_trigger(ra_data->work);
+ }
+ ra_data->cur_cmd = NULL;
+ free_cmd(cmd);
+ } else {
+ /* wait for connection event */
+ }
+
+ return FALSE;
+}
+
+
+static gboolean
+connection_takeover_timeout_cb(gpointer data)
+{
+ lrm_state_t *lrm_state = NULL;
+ remote_ra_cmd_t *cmd = data;
+
+ crm_info("takeover event timed out for node %s", cmd->rsc_id);
+ cmd->takeover_timeout_id = 0;
+
+ lrm_state = lrm_state_find(cmd->rsc_id);
+
+ handle_remote_ra_stop(lrm_state, cmd);
+ free_cmd(cmd);
+
+ return FALSE;
+}
+
+static gboolean
+monitor_timeout_cb(gpointer data)
+{
+ lrm_state_t *lrm_state = NULL;
+ remote_ra_cmd_t *cmd = data;
+
+ lrm_state = lrm_state_find(cmd->rsc_id);
+
+ crm_info("Timed out waiting for remote poke response from %s%s",
+ cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
+ cmd->monitor_timeout_id = 0;
+ pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
+ "Remote executor did not respond");
+
+ if (lrm_state && lrm_state->remote_ra_data) {
+ remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
+
+ if (ra_data->cur_cmd == cmd) {
+ ra_data->cur_cmd = NULL;
+ }
+ if (ra_data->cmds) {
+ mainloop_set_trigger(ra_data->work);
+ }
+ }
+
+ report_remote_ra_result(cmd);
+ free_cmd(cmd);
+
+ if(lrm_state) {
+ lrm_state_disconnect(lrm_state);
+ }
+ return FALSE;
+}
+
+static void
+synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
+{
+ lrmd_event_data_t op = { 0, };
+
+ if (lrm_state == NULL) {
+ /* if lrm_state not given assume local */
+ lrm_state = lrm_state_find(controld_globals.our_nodename);
+ }
+ CRM_ASSERT(lrm_state != NULL);
+
+ op.type = lrmd_event_exec_complete;
+ op.rsc_id = rsc_id;
+ op.op_type = op_type;
+ op.t_run = (unsigned int) time(NULL);
+ op.t_rcchange = op.t_run;
+ op.call_id = generate_callid();
+ lrmd__set_result(&op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
+ process_lrm_event(lrm_state, &op, NULL, NULL);
+}
+
+void
+remote_lrm_op_callback(lrmd_event_data_t * op)
+{
+ gboolean cmd_handled = FALSE;
+ lrm_state_t *lrm_state = NULL;
+ remote_ra_data_t *ra_data = NULL;
+ remote_ra_cmd_t *cmd = NULL;
+
+ crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
+ "(%d) status=%s (%d)",
+ (op->op_type? op->op_type : ""), (op->op_type? " " : ""),
+ lrmd_event_type2str(op->type), op->remote_nodename,
+ services_ocf_exitcode_str(op->rc), op->rc,
+ pcmk_exec_status_str(op->op_status), op->op_status);
+
+ lrm_state = lrm_state_find(op->remote_nodename);
+ if (!lrm_state || !lrm_state->remote_ra_data) {
+ crm_debug("No state information found for remote connection event");
+ return;
+ }
+ ra_data = lrm_state->remote_ra_data;
+
+ if (op->type == lrmd_event_new_client) {
+ // Another client has connected to the remote daemon
+
+ if (pcmk_is_set(ra_data->status, expect_takeover)) {
+ // Great, we knew this was coming
+ lrm_remote_clear_flags(lrm_state, expect_takeover);
+ lrm_remote_set_flags(lrm_state, takeover_complete);
+
+ } else {
+ crm_err("Disconnecting from Pacemaker Remote node %s due to "
+ "unexpected client takeover", op->remote_nodename);
+ /* In this case, lrmd_tls_connection_destroy() will be called under the control of mainloop. */
+ /* Do not free lrm_state->conn yet. */
+ /* It'll be freed in the following stop action. */
+ lrm_state_disconnect_only(lrm_state);
+ }
+ return;
+ }
+
+ /* filter all EXEC events up */
+ if (op->type == lrmd_event_exec_complete) {
+ if (pcmk_is_set(ra_data->status, takeover_complete)) {
+ crm_debug("ignoring event, this connection is taken over by another node");
+ } else {
+ lrm_op_callback(op);
+ }
+ return;
+ }
+
+ if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
+
+ if (!pcmk_is_set(ra_data->status, remote_active)) {
+ crm_debug("Disconnection from Pacemaker Remote node %s complete",
+ lrm_state->node_name);
+
+ } else if (!remote_ra_is_in_maintenance(lrm_state)) {
+ crm_err("Lost connection to Pacemaker Remote node %s",
+ lrm_state->node_name);
+ ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
+ ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
+
+ } else {
+ crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
+ lrm_state->node_name);
+ /* Do roughly what a 'stop' on the remote-resource would do */
+ handle_remote_ra_stop(lrm_state, NULL);
+ remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
+ /* now fake the reply of a successful 'stop' */
+ synthesize_lrmd_success(NULL, lrm_state->node_name, "stop");
+ }
+ return;
+ }
+
+ if (!ra_data->cur_cmd) {
+ crm_debug("no event to match");
+ return;
+ }
+
+ cmd = ra_data->cur_cmd;
+
+ /* Start actions and migrate from actions complete after connection
+ * comes back to us. */
+ if (op->type == lrmd_event_connect && pcmk__strcase_any_of(cmd->action, "start",
+ "migrate_from", NULL)) {
+ if (op->connection_rc < 0) {
+ update_remaining_timeout(cmd);
+
+ if ((op->connection_rc == -ENOKEY)
+ || (op->connection_rc == -EKEYREJECTED)) {
+ // Hard error, don't retry
+ pcmk__set_result(&(cmd->result), PCMK_OCF_INVALID_PARAM,
+ PCMK_EXEC_ERROR,
+ pcmk_strerror(op->connection_rc));
+
+ } else if (cmd->remaining_timeout > 3000) {
+ crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
+ g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
+ return;
+
+ } else {
+ crm_trace("can't reschedule start, remaining timeout too small %d",
+ cmd->remaining_timeout);
+ pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
+ PCMK_EXEC_TIMEOUT,
+ "%s without enough time to retry",
+ pcmk_strerror(op->connection_rc));
+ }
+
+ } else {
+ lrm_state_reset_tables(lrm_state, TRUE);
+ pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
+ lrm_remote_set_flags(lrm_state, remote_active);
+ }
+
+ crm_debug("Remote connection event matched %s action", cmd->action);
+ report_remote_ra_result(cmd);
+ cmd_handled = TRUE;
+
+ } else if (op->type == lrmd_event_poke && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
+
+ if (cmd->monitor_timeout_id) {
+ g_source_remove(cmd->monitor_timeout_id);
+ cmd->monitor_timeout_id = 0;
+ }
+
+ /* Only report success the first time, after that only worry about failures.
+ * For this function, if we get the poke pack, it is always a success. Pokes
+ * only fail if the send fails, or the response times out. */
+ if (!pcmk_is_set(cmd->status, cmd_reported_success)) {
+ pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
+ report_remote_ra_result(cmd);
+ cmd_set_flags(cmd, cmd_reported_success);
+ }
+
+ crm_debug("Remote poke event matched %s action", cmd->action);
+
+ /* success, keep rescheduling if interval is present. */
+ if (cmd->interval_ms && !pcmk_is_set(cmd->status, cmd_cancel)) {
+ ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
+ cmd->interval_id = g_timeout_add(cmd->interval_ms,
+ recurring_helper, cmd);
+ cmd = NULL; /* prevent free */
+ }
+ cmd_handled = TRUE;
+
+ } else if (op->type == lrmd_event_disconnect && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
+ if (pcmk_is_set(ra_data->status, remote_active) &&
+ !pcmk_is_set(cmd->status, cmd_cancel)) {
+ pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
+ PCMK_EXEC_ERROR,
+ "Remote connection unexpectedly dropped "
+ "during monitor");
+ report_remote_ra_result(cmd);
+ crm_err("Remote connection to %s unexpectedly dropped during monitor",
+ lrm_state->node_name);
+ }
+ cmd_handled = TRUE;
+
+ } else if (op->type == lrmd_event_new_client && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
+
+ handle_remote_ra_stop(lrm_state, cmd);
+ cmd_handled = TRUE;
+
+ } else {
+ crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
+ }
+
+ if (cmd_handled) {
+ ra_data->cur_cmd = NULL;
+ if (ra_data->cmds) {
+ mainloop_set_trigger(ra_data->work);
+ }
+ free_cmd(cmd);
+ }
+}
+
+static void
+handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
+{
+ remote_ra_data_t *ra_data = NULL;
+
+ CRM_ASSERT(lrm_state);
+ ra_data = lrm_state->remote_ra_data;
+
+ if (!pcmk_is_set(ra_data->status, takeover_complete)) {
+ /* delete pending ops when ever the remote connection is intentionally stopped */
+ g_hash_table_remove_all(lrm_state->active_ops);
+ } else {
+ /* we no longer hold the history if this connection has been migrated,
+ * however, we keep metadata cache for future use */
+ lrm_state_reset_tables(lrm_state, FALSE);
+ }
+
+ lrm_remote_clear_flags(lrm_state, remote_active);
+ lrm_state_disconnect(lrm_state);
+
+ if (ra_data->cmds) {
+ g_list_free_full(ra_data->cmds, free_cmd);
+ }
+ if (ra_data->recurring_cmds) {
+ g_list_free_full(ra_data->recurring_cmds, free_cmd);
+ }
+ ra_data->cmds = NULL;
+ ra_data->recurring_cmds = NULL;
+ ra_data->cur_cmd = NULL;
+
+ if (cmd) {
+ pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
+ report_remote_ra_result(cmd);
+ }
+}
+
+// \return Standard Pacemaker return code
+static int
+handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
+{
+ const char *server = NULL;
+ lrmd_key_value_t *tmp = NULL;
+ int port = 0;
+ int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
+ int rc = pcmk_rc_ok;
+
+ for (tmp = cmd->params; tmp; tmp = tmp->next) {
+ if (pcmk__strcase_any_of(tmp->key, XML_RSC_ATTR_REMOTE_RA_ADDR,
+ XML_RSC_ATTR_REMOTE_RA_SERVER, NULL)) {
+ server = tmp->value;
+ } else if (pcmk__str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT, pcmk__str_casei)) {
+ port = atoi(tmp->value);
+ } else if (pcmk__str_eq(tmp->key, CRM_META "_" XML_RSC_ATTR_CONTAINER, pcmk__str_casei)) {
+ lrm_remote_set_flags(lrm_state, controlling_guest);
+ }
+ }
+
+ rc = controld_connect_remote_executor(lrm_state, server, port,
+ timeout_used);
+ if (rc != pcmk_rc_ok) {
+ pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
+ PCMK_EXEC_ERROR,
+ "Could not connect to Pacemaker Remote node %s: %s",
+ lrm_state->node_name, pcmk_rc_str(rc));
+ }
+ return rc;
+}
+
+static gboolean
+handle_remote_ra_exec(gpointer user_data)
+{
+ int rc = 0;
+ lrm_state_t *lrm_state = user_data;
+ remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
+ remote_ra_cmd_t *cmd;
+ GList *first = NULL;
+
+ if (ra_data->cur_cmd) {
+ /* still waiting on previous cmd */
+ return TRUE;
+ }
+
+ while (ra_data->cmds) {
+ first = ra_data->cmds;
+ cmd = first->data;
+ if (cmd->delay_id) {
+ /* still waiting for start delay timer to trip */
+ return TRUE;
+ }
+
+ ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
+ g_list_free_1(first);
+
+ if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) {
+ lrm_remote_clear_flags(lrm_state, expect_takeover | takeover_complete);
+ if (handle_remote_ra_start(lrm_state, cmd,
+ cmd->timeout) == pcmk_rc_ok) {
+ /* take care of this later when we get async connection result */
+ crm_debug("Initiated async remote connection, %s action will complete after connect event",
+ cmd->action);
+ ra_data->cur_cmd = cmd;
+ return TRUE;
+ }
+ report_remote_ra_result(cmd);
+
+ } else if (!strcmp(cmd->action, "monitor")) {
+
+ if (lrm_state_is_connected(lrm_state) == TRUE) {
+ rc = lrm_state_poke_connection(lrm_state);
+ if (rc < 0) {
+ pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
+ PCMK_EXEC_ERROR, pcmk_strerror(rc));
+ }
+ } else {
+ rc = -1;
+ pcmk__set_result(&(cmd->result), PCMK_OCF_NOT_RUNNING,
+ PCMK_EXEC_DONE, "Remote connection inactive");
+ }
+
+ if (rc == 0) {
+ crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
+ cmd->rsc_id);
+ ra_data->cur_cmd = cmd;
+ cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
+ return TRUE;
+ }
+ report_remote_ra_result(cmd);
+
+ } else if (!strcmp(cmd->action, "stop")) {
+
+ if (pcmk_is_set(ra_data->status, expect_takeover)) {
+ /* briefly wait on stop for the takeover event to occur. If the
+ * takeover event does not occur during the wait period, that's fine.
+ * It just means that the remote-node's lrm_status section is going to get
+ * cleared which will require all the resources running in the remote-node
+ * to be explicitly re-detected via probe actions. If the takeover does occur
+ * successfully, then we can leave the status section intact. */
+ cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
+ ra_data->cur_cmd = cmd;
+ return TRUE;
+ }
+
+ handle_remote_ra_stop(lrm_state, cmd);
+
+ } else if (!strcmp(cmd->action, "migrate_to")) {
+ lrm_remote_clear_flags(lrm_state, takeover_complete);
+ lrm_remote_set_flags(lrm_state, expect_takeover);
+ pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
+ report_remote_ra_result(cmd);
+ } else if (pcmk__str_any_of(cmd->action, CRMD_ACTION_RELOAD,
+ CRMD_ACTION_RELOAD_AGENT, NULL)) {
+ /* Currently the only reloadable parameter is reconnect_interval,
+ * which is only used by the scheduler via the CIB, so reloads are a
+ * no-op.
+ *
+ * @COMPAT DC <2.1.0: We only need to check for "reload" in case
+ * we're in a rolling upgrade with a DC scheduling "reload" instead
+ * of "reload-agent". An OCF 1.1 "reload" would be a no-op anyway,
+ * so this would work for that purpose as well.
+ */
+ pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
+ report_remote_ra_result(cmd);
+ }
+
+ free_cmd(cmd);
+ }
+
+ return TRUE;
+}
+
+static void
+remote_ra_data_init(lrm_state_t * lrm_state)
+{
+ remote_ra_data_t *ra_data = NULL;
+
+ if (lrm_state->remote_ra_data) {
+ return;
+ }
+
+ ra_data = calloc(1, sizeof(remote_ra_data_t));
+ ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
+ lrm_state->remote_ra_data = ra_data;
+}
+
+void
+remote_ra_cleanup(lrm_state_t * lrm_state)
+{
+ remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
+
+ if (!ra_data) {
+ return;
+ }
+
+ if (ra_data->cmds) {
+ g_list_free_full(ra_data->cmds, free_cmd);
+ }
+
+ if (ra_data->recurring_cmds) {
+ g_list_free_full(ra_data->recurring_cmds, free_cmd);
+ }
+ mainloop_destroy_trigger(ra_data->work);
+ free(ra_data);
+ lrm_state->remote_ra_data = NULL;
+}
+
+gboolean
+is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
+{
+ if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
+ return TRUE;
+ }
+ if ((id != NULL) && (lrm_state_find(id) != NULL)
+ && !pcmk__str_eq(id, controld_globals.our_nodename, pcmk__str_casei)) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+lrmd_rsc_info_t *
+remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
+{
+ lrmd_rsc_info_t *info = NULL;
+
+ if ((lrm_state_find(rsc_id))) {
+ info = calloc(1, sizeof(lrmd_rsc_info_t));
+
+ info->id = strdup(rsc_id);
+ info->type = strdup(REMOTE_LRMD_RA);
+ info->standard = strdup(PCMK_RESOURCE_CLASS_OCF);
+ info->provider = strdup("pacemaker");
+ }
+
+ return info;
+}
+
+static gboolean
+is_remote_ra_supported_action(const char *action)
+{
+ return pcmk__str_any_of(action,
+ CRMD_ACTION_START,
+ CRMD_ACTION_STOP,
+ CRMD_ACTION_STATUS,
+ CRMD_ACTION_MIGRATE,
+ CRMD_ACTION_MIGRATED,
+ CRMD_ACTION_RELOAD_AGENT,
+ CRMD_ACTION_RELOAD,
+ NULL);
+}
+
+static GList *
+fail_all_monitor_cmds(GList * list)
+{
+ GList *rm_list = NULL;
+ remote_ra_cmd_t *cmd = NULL;
+ GList *gIter = NULL;
+
+ for (gIter = list; gIter != NULL; gIter = gIter->next) {
+ cmd = gIter->data;
+ if ((cmd->interval_ms > 0) && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
+ rm_list = g_list_append(rm_list, cmd);
+ }
+ }
+
+ for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
+ cmd = gIter->data;
+
+ pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
+ PCMK_EXEC_ERROR, "Lost connection to remote executor");
+ crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
+ cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
+ report_remote_ra_result(cmd);
+
+ list = g_list_remove(list, cmd);
+ free_cmd(cmd);
+ }
+
+ /* frees only the list data, not the cmds */
+ g_list_free(rm_list);
+ return list;
+}
+
+static GList *
+remove_cmd(GList * list, const char *action, guint interval_ms)
+{
+ remote_ra_cmd_t *cmd = NULL;
+ GList *gIter = NULL;
+
+ for (gIter = list; gIter != NULL; gIter = gIter->next) {
+ cmd = gIter->data;
+ if ((cmd->interval_ms == interval_ms)
+ && pcmk__str_eq(cmd->action, action, pcmk__str_casei)) {
+ break;
+ }
+ cmd = NULL;
+ }
+ if (cmd) {
+ list = g_list_remove(list, cmd);
+ free_cmd(cmd);
+ }
+ return list;
+}
+
+int
+remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
+ const char *action, guint interval_ms)
+{
+ lrm_state_t *connection_rsc = NULL;
+ remote_ra_data_t *ra_data = NULL;
+
+ connection_rsc = lrm_state_find(rsc_id);
+ if (!connection_rsc || !connection_rsc->remote_ra_data) {
+ return -EINVAL;
+ }
+
+ ra_data = connection_rsc->remote_ra_data;
+ ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
+ ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
+ interval_ms);
+ if (ra_data->cur_cmd &&
+ (ra_data->cur_cmd->interval_ms == interval_ms) &&
+ (pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) {
+
+ cmd_set_flags(ra_data->cur_cmd, cmd_cancel);
+ }
+
+ return 0;
+}
+
+static remote_ra_cmd_t *
+handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
+ const char *userdata)
+{
+ GList *gIter = NULL;
+ remote_ra_cmd_t *cmd = NULL;
+
+ /* there are 3 places a potential duplicate monitor operation
+ * could exist.
+ * 1. recurring_cmds list. where the op is waiting for its next interval
+ * 2. cmds list, where the op is queued to get executed immediately
+ * 3. cur_cmd, which means the monitor op is in flight right now.
+ */
+ if (interval_ms == 0) {
+ return NULL;
+ }
+
+ if (ra_data->cur_cmd &&
+ !pcmk_is_set(ra_data->cur_cmd->status, cmd_cancel) &&
+ (ra_data->cur_cmd->interval_ms == interval_ms) &&
+ pcmk__str_eq(ra_data->cur_cmd->action, "monitor", pcmk__str_casei)) {
+
+ cmd = ra_data->cur_cmd;
+ goto handle_dup;
+ }
+
+ for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
+ cmd = gIter->data;
+ if ((cmd->interval_ms == interval_ms)
+ && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
+ goto handle_dup;
+ }
+ }
+
+ for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
+ cmd = gIter->data;
+ if ((cmd->interval_ms == interval_ms)
+ && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
+ goto handle_dup;
+ }
+ }
+
+ return NULL;
+
+handle_dup:
+
+ crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT,
+ cmd->rsc_id, "monitor", interval_ms);
+
+ /* update the userdata */
+ if (userdata) {
+ free(cmd->userdata);
+ cmd->userdata = strdup(userdata);
+ }
+
+ /* if we've already reported success, generate a new call id */
+ if (pcmk_is_set(cmd->status, cmd_reported_success)) {
+ cmd->start_time = time(NULL);
+ cmd->call_id = generate_callid();
+ cmd_clear_flags(cmd, cmd_reported_success);
+ }
+
+ /* if we have an interval_id set, that means we are in the process of
+ * waiting for this cmd's next interval. instead of waiting, cancel
+ * the timer and execute the action immediately */
+ if (cmd->interval_id) {
+ g_source_remove(cmd->interval_id);
+ cmd->interval_id = 0;
+ recurring_helper(cmd);
+ }
+
+ return cmd;
+}
+
+/*!
+ * \internal
+ * \brief Execute an action using the (internal) ocf:pacemaker:remote agent
+ *
+ * \param[in] lrm_state Executor state object for remote connection
+ * \param[in] rsc_id Connection resource ID
+ * \param[in] action Action to execute
+ * \param[in] userdata String to copy and pass to execution callback
+ * \param[in] interval_ms Action interval (in milliseconds)
+ * \param[in] timeout_ms Action timeout (in milliseconds)
+ * \param[in] start_delay_ms Delay (in milliseconds) before executing action
+ * \param[in,out] params Connection resource parameters
+ * \param[out] call_id Where to store call ID on success
+ *
+ * \return Standard Pacemaker return code
+ * \note This takes ownership of \p params, which should not be used or freed
+ * after calling this function.
+ */
+int
+controld_execute_remote_agent(const lrm_state_t *lrm_state, const char *rsc_id,
+ const char *action, const char *userdata,
+ guint interval_ms, int timeout_ms,
+ int start_delay_ms, lrmd_key_value_t *params,
+ int *call_id)
+{
+ lrm_state_t *connection_rsc = NULL;
+ remote_ra_cmd_t *cmd = NULL;
+ remote_ra_data_t *ra_data = NULL;
+
+ *call_id = 0;
+
+ CRM_CHECK((lrm_state != NULL) && (rsc_id != NULL) && (action != NULL)
+ && (userdata != NULL) && (call_id != NULL),
+ lrmd_key_value_freeall(params); return EINVAL);
+
+ if (!is_remote_ra_supported_action(action)) {
+ lrmd_key_value_freeall(params);
+ return EOPNOTSUPP;
+ }
+
+ connection_rsc = lrm_state_find(rsc_id);
+ if (connection_rsc == NULL) {
+ lrmd_key_value_freeall(params);
+ return ENOTCONN;
+ }
+
+ remote_ra_data_init(connection_rsc);
+ ra_data = connection_rsc->remote_ra_data;
+
+ cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
+ if (cmd) {
+ *call_id = cmd->call_id;
+ lrmd_key_value_freeall(params);
+ return pcmk_rc_ok;
+ }
+
+ cmd = calloc(1, sizeof(remote_ra_cmd_t));
+ if (cmd == NULL) {
+ lrmd_key_value_freeall(params);
+ return ENOMEM;
+ }
+
+ cmd->owner = strdup(lrm_state->node_name);
+ cmd->rsc_id = strdup(rsc_id);
+ cmd->action = strdup(action);
+ cmd->userdata = strdup(userdata);
+ if ((cmd->owner == NULL) || (cmd->rsc_id == NULL) || (cmd->action == NULL)
+ || (cmd->userdata == NULL)) {
+ free_cmd(cmd);
+ lrmd_key_value_freeall(params);
+ return ENOMEM;
+ }
+
+ cmd->interval_ms = interval_ms;
+ cmd->timeout = timeout_ms;
+ cmd->start_delay = start_delay_ms;
+ cmd->params = params;
+ cmd->start_time = time(NULL);
+
+ cmd->call_id = generate_callid();
+
+ if (cmd->start_delay) {
+ cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
+ }
+
+ ra_data->cmds = g_list_append(ra_data->cmds, cmd);
+ mainloop_set_trigger(ra_data->work);
+
+ *call_id = cmd->call_id;
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief Immediately fail all monitors of a remote node, if proxied here
+ *
+ * \param[in] node_name Name of pacemaker_remote node
+ */
+void
+remote_ra_fail(const char *node_name)
+{
+ lrm_state_t *lrm_state = lrm_state_find(node_name);
+
+ if (lrm_state && lrm_state_is_connected(lrm_state)) {
+ remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
+
+ crm_info("Failing monitors on Pacemaker Remote node %s", node_name);
+ ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
+ ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
+ }
+}
+
+/* A guest node fencing implied by host fencing looks like:
+ *
+ * <pseudo_event id="103" operation="stonith" operation_key="stonith-lxc1-off"
+ * on_node="lxc1" on_node_uuid="lxc1">
+ * <attributes CRM_meta_on_node="lxc1" CRM_meta_on_node_uuid="lxc1"
+ * CRM_meta_stonith_action="off" crm_feature_set="3.0.12"/>
+ * <downed>
+ * <node id="lxc1"/>
+ * </downed>
+ * </pseudo_event>
+ */
+#define XPATH_PSEUDO_FENCE "/" XML_GRAPH_TAG_PSEUDO_EVENT \
+ "[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \
+ "/" XML_CIB_TAG_NODE
+
+/*!
+ * \internal
+ * \brief Check a pseudo-action for Pacemaker Remote node side effects
+ *
+ * \param[in,out] xml XML of pseudo-action to check
+ */
+void
+remote_ra_process_pseudo(xmlNode *xml)
+{
+ xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
+
+ if (numXpathResults(search) == 1) {
+ xmlNode *result = getXpathResult(search, 0);
+
+ /* Normally, we handle the necessary side effects of a guest node stop
+ * action when reporting the remote agent's result. However, if the stop
+ * is implied due to fencing, it will be a fencing pseudo-event, and
+ * there won't be a result to report. Handle that case here.
+ *
+ * This will result in a duplicate call to remote_node_down() if the
+ * guest stop was real instead of implied, but that shouldn't hurt.
+ *
+ * There is still one corner case that isn't handled: if a guest node
+ * isn't running any resources when its host is fenced, it will appear
+ * to be cleanly stopped, so there will be no pseudo-fence, and our
+ * peer cache state will be incorrect unless and until the guest is
+ * recovered.
+ */
+ if (result) {
+ const char *remote = ID(result);
+
+ if (remote) {
+ remote_node_down(remote, DOWN_ERASE_LRM);
+ }
+ }
+ }
+ freeXpathObject(search);
+}
+
+static void
+remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
+{
+ xmlNode *update, *state;
+ int call_opt;
+ crm_node_t *node;
+
+ call_opt = crmd_cib_smart_opt();
+ node = crm_remote_peer_get(lrm_state->node_name);
+ CRM_CHECK(node != NULL, return);
+ update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
+ state = create_node_state_update(node, node_update_none, update,
+ __func__);
+ crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0");
+ if (controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt,
+ NULL) == pcmk_rc_ok) {
+ /* TODO: still not 100% sure that async update will succeed ... */
+ if (maintenance) {
+ lrm_remote_set_flags(lrm_state, remote_in_maint);
+ } else {
+ lrm_remote_clear_flags(lrm_state, remote_in_maint);
+ }
+ }
+ free_xml(update);
+}
+
+#define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
+ "[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \
+ XML_GRAPH_TAG_MAINTENANCE
+
+/*!
+ * \internal
+ * \brief Check a pseudo-action holding updates for maintenance state
+ *
+ * \param[in,out] xml XML of pseudo-action to check
+ */
+void
+remote_ra_process_maintenance_nodes(xmlNode *xml)
+{
+ xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
+
+ if (numXpathResults(search) == 1) {
+ xmlNode *node;
+ int cnt = 0, cnt_remote = 0;
+
+ for (node =
+ first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE);
+ node != NULL; node = pcmk__xml_next(node)) {
+ lrm_state_t *lrm_state = lrm_state_find(ID(node));
+
+ cnt++;
+ if (lrm_state && lrm_state->remote_ra_data &&
+ pcmk_is_set(((remote_ra_data_t *) lrm_state->remote_ra_data)->status, remote_active)) {
+ int is_maint;
+
+ cnt_remote++;
+ pcmk__scan_min_int(crm_element_value(node, XML_NODE_IS_MAINTENANCE),
+ &is_maint, 0);
+ remote_ra_maintenance(lrm_state, is_maint);
+ }
+ }
+ crm_trace("Action holds %d nodes (%d remotes found) "
+ "adjusting maintenance-mode", cnt, cnt_remote);
+ }
+ freeXpathObject(search);
+}
+
+gboolean
+remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
+{
+ remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
+ return pcmk_is_set(ra_data->status, remote_in_maint);
+}
+
+gboolean
+remote_ra_controlling_guest(lrm_state_t * lrm_state)
+{
+ remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
+ return pcmk_is_set(ra_data->status, controlling_guest);
+}
diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c
new file mode 100644
index 0000000..912f9a5
--- /dev/null
+++ b/daemons/controld/controld_schedulerd.c
@@ -0,0 +1,506 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <unistd.h> /* pid_t, sleep, ssize_t */
+
+#include <crm/cib.h>
+#include <crm/cluster.h>
+#include <crm/common/xml.h>
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml_internal.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_schedulerd.h>
+
+#include <pacemaker-controld.h>
+
+static void handle_disconnect(void);
+
+static pcmk_ipc_api_t *schedulerd_api = NULL;
+
+/*!
+ * \internal
+ * \brief Close any scheduler connection and free associated memory
+ */
+void
+controld_shutdown_schedulerd_ipc(void)
+{
+ controld_clear_fsa_input_flags(R_PE_REQUIRED);
+ pcmk_disconnect_ipc(schedulerd_api);
+ handle_disconnect();
+
+ pcmk_free_ipc_api(schedulerd_api);
+ schedulerd_api = NULL;
+}
+
+/*!
+ * \internal
+ * \brief Save CIB query result to file, raising FSA error
+ *
+ * \param[in] msg Ignored
+ * \param[in] call_id Call ID of CIB query
+ * \param[in] rc Return code of CIB query
+ * \param[in,out] output Result of CIB query
+ * \param[in] user_data Unique identifier for filename
+ *
+ * \note This is intended to be called after a scheduler connection fails.
+ */
+static void
+save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output,
+ void *user_data)
+{
+ const char *id = user_data;
+
+ register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
+ CRM_CHECK(id != NULL, return);
+
+ if (rc == pcmk_ok) {
+ char *filename = crm_strdup_printf(PE_STATE_DIR "/pe-core-%s.bz2", id);
+
+ if (write_xml_file(output, filename, TRUE) < 0) {
+ crm_err("Could not save Cluster Information Base to %s after scheduler crash",
+ filename);
+ } else {
+ crm_notice("Saved Cluster Information Base to %s after scheduler crash",
+ filename);
+ }
+ free(filename);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Respond to scheduler connection failure
+ */
+static void
+handle_disconnect(void)
+{
+ // If we aren't connected to the scheduler, we can't expect a reply
+ controld_expect_sched_reply(NULL);
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_PE_REQUIRED)) {
+ int rc = pcmk_ok;
+ char *uuid_str = crm_generate_uuid();
+
+ crm_crit("Connection to the scheduler failed "
+ CRM_XS " uuid=%s", uuid_str);
+
+ /*
+ * The scheduler died...
+ *
+ * Save the current CIB so that we have a chance of
+ * figuring out what killed it.
+ *
+ * Delay raising the I_ERROR until the query below completes or
+ * 5s is up, whichever comes first.
+ *
+ */
+ rc = controld_globals.cib_conn->cmds->query(controld_globals.cib_conn,
+ NULL, NULL,
+ cib_scope_local);
+ fsa_register_cib_callback(rc, uuid_str, save_cib_contents);
+
+ } else {
+ crm_info("Connection to the scheduler released");
+ }
+
+ controld_clear_fsa_input_flags(R_PE_CONNECTED);
+ controld_trigger_fsa();
+ return;
+}
+
+static void
+handle_reply(pcmk_schedulerd_api_reply_t *reply)
+{
+ const char *msg_ref = NULL;
+
+ if (!AM_I_DC) {
+ return;
+ }
+
+ msg_ref = reply->data.graph.reference;
+
+ if (msg_ref == NULL) {
+ crm_err("%s - Ignoring calculation with no reference", CRM_OP_PECALC);
+
+ } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref,
+ pcmk__str_none)) {
+ ha_msg_input_t fsa_input;
+ xmlNode *crm_data_node;
+
+ controld_stop_sched_timer();
+
+ /* do_te_invoke (which will eventually process the fsa_input we are constructing
+ * here) requires that fsa_input.xml be non-NULL. That will only happen if
+ * copy_ha_msg_input (which is called by register_fsa_input_adv) sees the
+ * fsa_input.msg that it is expecting. The scheduler's IPC dispatch function
+ * gave us the values we need, we just need to put them into XML.
+ *
+ * The name of the top level element here is irrelevant. Nothing checks it.
+ */
+ fsa_input.msg = create_xml_node(NULL, "dummy-reply");
+ crm_xml_add(fsa_input.msg, XML_ATTR_REFERENCE, msg_ref);
+ crm_xml_add(fsa_input.msg, F_CRM_TGRAPH_INPUT, reply->data.graph.input);
+
+ crm_data_node = create_xml_node(fsa_input.msg, F_CRM_DATA);
+ add_node_copy(crm_data_node, reply->data.graph.tgraph);
+ register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
+
+ free_xml(fsa_input.msg);
+
+ } else {
+ crm_info("%s calculation %s is obsolete", CRM_OP_PECALC, msg_ref);
+ }
+}
+
+static void
+scheduler_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type,
+ crm_exit_t status, void *event_data, void *user_data)
+{
+ pcmk_schedulerd_api_reply_t *reply = event_data;
+
+ switch (event_type) {
+ case pcmk_ipc_event_disconnect:
+ handle_disconnect();
+ break;
+
+ case pcmk_ipc_event_reply:
+ handle_reply(reply);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static bool
+new_schedulerd_ipc_connection(void)
+{
+ int rc;
+
+ controld_set_fsa_input_flags(R_PE_REQUIRED);
+
+ if (schedulerd_api == NULL) {
+ rc = pcmk_new_ipc_api(&schedulerd_api, pcmk_ipc_schedulerd);
+
+ if (rc != pcmk_rc_ok) {
+ crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc));
+ return false;
+ }
+ }
+
+ pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL);
+
+ rc = pcmk_connect_ipc(schedulerd_api, pcmk_ipc_dispatch_main);
+ if (rc != pcmk_rc_ok) {
+ crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc));
+ return false;
+ }
+
+ controld_set_fsa_input_flags(R_PE_CONNECTED);
+ return true;
+}
+
+static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc,
+ xmlNode *output, void *user_data);
+
+/* A_PE_START, A_PE_STOP, O_PE_RESTART */
+void
+do_pe_control(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ if (pcmk_is_set(action, A_PE_STOP)) {
+ controld_clear_fsa_input_flags(R_PE_REQUIRED);
+ pcmk_disconnect_ipc(schedulerd_api);
+ handle_disconnect();
+ }
+ if (pcmk_is_set(action, A_PE_START)
+ && !pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
+
+ if (cur_state == S_STOPPING) {
+ crm_info("Ignoring request to connect to scheduler while shutting down");
+
+ } else if (!new_schedulerd_ipc_connection()) {
+ crm_warn("Could not connect to scheduler");
+ register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
+ }
+ }
+}
+
+static int fsa_pe_query = 0;
+static mainloop_timer_t *controld_sched_timer = NULL;
+
+// @TODO Make this a configurable cluster option if there's demand for it
+#define SCHED_TIMEOUT_MS (120000)
+
+/*!
+ * \internal
+ * \brief Handle a timeout waiting for scheduler reply
+ *
+ * \param[in] user_data Ignored
+ *
+ * \return FALSE (indicating that timer should not be restarted)
+ */
+static gboolean
+controld_sched_timeout(gpointer user_data)
+{
+ if (AM_I_DC) {
+ /* If this node is the DC but can't communicate with the scheduler, just
+ * exit (and likely get fenced) so this node doesn't interfere with any
+ * further DC elections.
+ *
+ * @TODO We could try something less drastic first, like disconnecting
+ * and reconnecting to the scheduler, but something is likely going
+ * seriously wrong, so perhaps it's better to just fail as quickly as
+ * possible.
+ */
+ crmd_exit(CRM_EX_FATAL);
+ }
+ return FALSE;
+}
+
+void
+controld_stop_sched_timer(void)
+{
+ if ((controld_sched_timer != NULL)
+ && (controld_globals.fsa_pe_ref != NULL)) {
+ crm_trace("Stopping timer for scheduler reply %s",
+ controld_globals.fsa_pe_ref);
+ }
+ mainloop_timer_stop(controld_sched_timer);
+}
+
+/*!
+ * \internal
+ * \brief Set the scheduler request currently being waited on
+ *
+ * \param[in] ref Request to expect reply to (or NULL for none)
+ *
+ * \note This function takes ownership of \p ref.
+ */
+void
+controld_expect_sched_reply(char *ref)
+{
+ if (ref) {
+ if (controld_sched_timer == NULL) {
+ controld_sched_timer = mainloop_timer_add("scheduler_reply_timer",
+ SCHED_TIMEOUT_MS, FALSE,
+ controld_sched_timeout,
+ NULL);
+ }
+ mainloop_timer_start(controld_sched_timer);
+ } else {
+ controld_stop_sched_timer();
+ }
+ free(controld_globals.fsa_pe_ref);
+ controld_globals.fsa_pe_ref = ref;
+}
+
+/*!
+ * \internal
+ * \brief Free the scheduler reply timer
+ */
+void
+controld_free_sched_timer(void)
+{
+ if (controld_sched_timer != NULL) {
+ mainloop_timer_del(controld_sched_timer);
+ controld_sched_timer = NULL;
+ }
+}
+
+/* A_PE_INVOKE */
+void
+do_pe_invoke(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ cib_t *cib_conn = controld_globals.cib_conn;
+
+ if (AM_I_DC == FALSE) {
+ crm_err("Not invoking scheduler because not DC: %s",
+ fsa_action2string(action));
+ return;
+ }
+
+ if (!pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ crm_err("Cannot shut down gracefully without the scheduler");
+ register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL);
+
+ } else {
+ crm_info("Waiting for the scheduler to connect");
+ crmd_fsa_stall(FALSE);
+ controld_set_fsa_action_flags(A_PE_START);
+ controld_trigger_fsa();
+ }
+ return;
+ }
+
+ if (cur_state != S_POLICY_ENGINE) {
+ crm_notice("Not invoking scheduler because in state %s",
+ fsa_state2string(cur_state));
+ return;
+ }
+ if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
+ crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!");
+
+ /* start the join from scratch */
+ register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL);
+ return;
+ }
+
+ fsa_pe_query = cib_conn->cmds->query(cib_conn, NULL, NULL, cib_scope_local);
+
+ crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query,
+ fsa_state2string(controld_globals.fsa_state));
+
+ controld_expect_sched_reply(NULL);
+ fsa_register_cib_callback(fsa_pe_query, NULL, do_pe_invoke_callback);
+}
+
+static void
+force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value)
+{
+ int max = 0;
+ int lpc = 0;
+ const char *xpath_base = NULL;
+ char *xpath_string = NULL;
+ xmlXPathObjectPtr xpathObj = NULL;
+
+ xpath_base = pcmk_cib_xpath_for(XML_CIB_TAG_CRMCONFIG);
+ if (xpath_base == NULL) {
+ crm_err(XML_CIB_TAG_CRMCONFIG " CIB element not known (bug?)");
+ return;
+ }
+
+ xpath_string = crm_strdup_printf("%s//%s//nvpair[@name='%s']",
+ xpath_base, XML_CIB_TAG_PROPSET,
+ attr_name);
+ xpathObj = xpath_search(xml, xpath_string);
+ max = numXpathResults(xpathObj);
+ free(xpath_string);
+
+ for (lpc = 0; lpc < max; lpc++) {
+ xmlNode *match = getXpathResult(xpathObj, lpc);
+ crm_trace("Forcing %s/%s = %s", ID(match), attr_name, attr_value);
+ crm_xml_add(match, XML_NVPAIR_ATTR_VALUE, attr_value);
+ }
+
+ if(max == 0) {
+ xmlNode *configuration = NULL;
+ xmlNode *crm_config = NULL;
+ xmlNode *cluster_property_set = NULL;
+
+ crm_trace("Creating %s-%s for %s=%s",
+ CIB_OPTIONS_FIRST, attr_name, attr_name, attr_value);
+
+ configuration = pcmk__xe_match(xml, XML_CIB_TAG_CONFIGURATION, NULL,
+ NULL);
+ if (configuration == NULL) {
+ configuration = create_xml_node(xml, XML_CIB_TAG_CONFIGURATION);
+ }
+
+ crm_config = pcmk__xe_match(configuration, XML_CIB_TAG_CRMCONFIG, NULL,
+ NULL);
+ if (crm_config == NULL) {
+ crm_config = create_xml_node(configuration, XML_CIB_TAG_CRMCONFIG);
+ }
+
+ cluster_property_set = pcmk__xe_match(crm_config, XML_CIB_TAG_PROPSET,
+ NULL, NULL);
+ if (cluster_property_set == NULL) {
+ cluster_property_set = create_xml_node(crm_config, XML_CIB_TAG_PROPSET);
+ crm_xml_add(cluster_property_set, XML_ATTR_ID, CIB_OPTIONS_FIRST);
+ }
+
+ xml = create_xml_node(cluster_property_set, XML_CIB_TAG_NVPAIR);
+
+ crm_xml_set_id(xml, "%s-%s", CIB_OPTIONS_FIRST, attr_name);
+ crm_xml_add(xml, XML_NVPAIR_ATTR_NAME, attr_name);
+ crm_xml_add(xml, XML_NVPAIR_ATTR_VALUE, attr_value);
+ }
+ freeXpathObject(xpathObj);
+}
+
+static void
+do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ char *ref = NULL;
+ pid_t watchdog = pcmk__locate_sbd();
+
+ if (rc != pcmk_ok) {
+ crm_err("Could not retrieve the Cluster Information Base: %s "
+ CRM_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id);
+ register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
+ return;
+
+ } else if (call_id != fsa_pe_query) {
+ crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query);
+ return;
+
+ } else if (!AM_I_DC
+ || !pcmk_is_set(controld_globals.fsa_input_register,
+ R_PE_CONNECTED)) {
+ crm_debug("No need to invoke the scheduler anymore");
+ return;
+
+ } else if (controld_globals.fsa_state != S_POLICY_ENGINE) {
+ crm_debug("Discarding scheduler request in state: %s",
+ fsa_state2string(controld_globals.fsa_state));
+ return;
+
+ /* this callback counts as 1 */
+ } else if (num_cib_op_callbacks() > 1) {
+ crm_debug("Re-asking for the CIB: %d other peer updates still pending",
+ (num_cib_op_callbacks() - 1));
+ sleep(1);
+ controld_set_fsa_action_flags(A_PE_INVOKE);
+ controld_trigger_fsa();
+ return;
+ }
+
+ CRM_LOG_ASSERT(output != NULL);
+
+ /* Refresh the remote node cache and the known node cache when the
+ * scheduler is invoked */
+ pcmk__refresh_node_caches_from_cib(output);
+
+ crm_xml_add(output, XML_ATTR_DC_UUID, controld_globals.our_uuid);
+ pcmk__xe_set_bool_attr(output, XML_ATTR_HAVE_QUORUM,
+ pcmk_is_set(controld_globals.flags,
+ controld_has_quorum));
+
+ force_local_option(output, XML_ATTR_HAVE_WATCHDOG, pcmk__btoa(watchdog));
+
+ if (pcmk_is_set(controld_globals.flags, controld_ever_had_quorum)
+ && !crm_have_quorum) {
+ crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1);
+ }
+
+ rc = pcmk_rc2legacy(pcmk_schedulerd_api_graph(schedulerd_api, output, &ref));
+
+ if (rc < 0) {
+ crm_err("Could not contact the scheduler: %s " CRM_XS " rc=%d",
+ pcmk_strerror(rc), rc);
+ register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
+ } else {
+ CRM_ASSERT(ref != NULL);
+ controld_expect_sched_reply(ref);
+ crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, "
+ "quorate=%s", fsa_pe_query, controld_globals.fsa_pe_ref,
+ crm_peer_seq, pcmk__btoa(pcmk_is_set(controld_globals.flags,
+ controld_has_quorum)));
+ }
+}
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
new file mode 100644
index 0000000..d8cfcad
--- /dev/null
+++ b/daemons/controld/controld_te_actions.c
@@ -0,0 +1,746 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <crm/crm.h>
+#include <crm/cib.h>
+#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_free_event()
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/cluster.h>
+
+#include <pacemaker-internal.h>
+#include <pacemaker-controld.h>
+
+static GHashTable *te_targets = NULL;
+void send_rsc_command(pcmk__graph_action_t *action);
+static void te_update_job_count(pcmk__graph_action_t *action, int offset);
+
+static void
+te_start_action_timer(const pcmk__graph_t *graph, pcmk__graph_action_t *action)
+{
+ action->timer = g_timeout_add(action->timeout + graph->network_delay,
+ action_timer_callback, (void *) action);
+ CRM_ASSERT(action->timer != 0);
+}
+
+/*!
+ * \internal
+ * \brief Execute a graph pseudo-action
+ *
+ * \param[in,out] graph Transition graph being executed
+ * \param[in,out] pseudo Pseudo-action to execute
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+execute_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *pseudo)
+{
+ const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK);
+
+ /* send to peers as well? */
+ if (pcmk__str_eq(task, CRM_OP_MAINTENANCE_NODES, pcmk__str_casei)) {
+ GHashTableIter iter;
+ crm_node_t *node = NULL;
+
+ g_hash_table_iter_init(&iter, crm_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
+ xmlNode *cmd = NULL;
+
+ if (pcmk__str_eq(controld_globals.our_nodename, node->uname,
+ pcmk__str_casei)) {
+ continue;
+ }
+
+ cmd = create_request(task, pseudo->xml, node->uname,
+ CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
+ send_cluster_message(node, crm_msg_crmd, cmd, FALSE);
+ free_xml(cmd);
+ }
+
+ remote_ra_process_maintenance_nodes(pseudo->xml);
+ } else {
+ /* Check action for Pacemaker Remote node side effects */
+ remote_ra_process_pseudo(pseudo->xml);
+ }
+
+ crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
+ crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY));
+ te_action_confirmed(pseudo, graph);
+ return pcmk_rc_ok;
+}
+
+static int
+get_target_rc(pcmk__graph_action_t *action)
+{
+ int exit_status;
+
+ pcmk__scan_min_int(crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC),
+ &exit_status, 0);
+ return exit_status;
+}
+
+/*!
+ * \internal
+ * \brief Execute a cluster action from a transition graph
+ *
+ * \param[in,out] graph Transition graph being executed
+ * \param[in,out] action Cluster action to execute
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
+{
+ char *counter = NULL;
+ xmlNode *cmd = NULL;
+ gboolean is_local = FALSE;
+
+ const char *id = NULL;
+ const char *task = NULL;
+ const char *value = NULL;
+ const char *on_node = NULL;
+ const char *router_node = NULL;
+
+ gboolean rc = TRUE;
+ gboolean no_wait = FALSE;
+
+ id = ID(action->xml);
+ CRM_CHECK(!pcmk__str_empty(id), return EPROTO);
+
+ task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
+ CRM_CHECK(!pcmk__str_empty(task), return EPROTO);
+
+ on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ CRM_CHECK(!pcmk__str_empty(on_node), return pcmk_rc_node_unknown);
+
+ router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
+ if (router_node == NULL) {
+ router_node = on_node;
+ if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_none)) {
+ const char *mode = crm_element_value(action->xml, PCMK__XA_MODE);
+
+ if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_none)) {
+ router_node = controld_globals.our_nodename;
+ }
+ }
+ }
+
+ if (pcmk__str_eq(router_node, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ is_local = TRUE;
+ }
+
+ value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
+ if (crm_is_true(value)) {
+ no_wait = TRUE;
+ }
+
+ crm_info("Handling controller request '%s' (%s on %s)%s%s",
+ id, task, on_node, (is_local? " locally" : ""),
+ (no_wait? " without waiting" : ""));
+
+ if (is_local && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_none)) {
+ /* defer until everything else completes */
+ crm_info("Controller request '%s' is a local shutdown", id);
+ graph->completion_action = pcmk__graph_shutdown;
+ graph->abort_reason = "local shutdown";
+ te_action_confirmed(action, graph);
+ return pcmk_rc_ok;
+
+ } else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_none)) {
+ crm_node_t *peer = crm_get_peer(0, router_node);
+
+ pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN);
+ }
+
+ cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
+
+ counter = pcmk__transition_key(controld_globals.transition_graph->id,
+ action->id, get_target_rc(action),
+ controld_globals.te_uuid);
+ crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter);
+
+ rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE);
+ free(counter);
+ free_xml(cmd);
+
+ if (rc == FALSE) {
+ crm_err("Action %d failed: send", action->id);
+ return ECOMM;
+
+ } else if (no_wait) {
+ te_action_confirmed(action, graph);
+
+ } else {
+ if (action->timeout <= 0) {
+ crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead",
+ action->id, task, on_node, action->timeout, graph->network_delay);
+ action->timeout = (int) graph->network_delay;
+ }
+ te_start_action_timer(graph, action);
+ }
+
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief Synthesize an executor event for a resource action timeout
+ *
+ * \param[in] action Resource action that timed out
+ * \param[in] target_rc Expected result of action that timed out
+ *
+ * Synthesize an executor event for a resource action timeout. (If the executor
+ * gets a timeout while waiting for a resource action to complete, that will be
+ * reported via the usual callback. This timeout means we didn't hear from the
+ * executor itself or the controller that relayed the action to the executor.)
+ *
+ * \return Newly created executor event for result of \p action
+ * \note The caller is responsible for freeing the return value using
+ * lrmd_free_event().
+ */
+static lrmd_event_data_t *
+synthesize_timeout_event(const pcmk__graph_action_t *action, int target_rc)
+{
+ lrmd_event_data_t *op = NULL;
+ const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ const char *reason = NULL;
+ char *dynamic_reason = NULL;
+
+ if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) {
+ reason = "Local executor did not return result in time";
+ } else {
+ const char *router_node = NULL;
+
+ router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
+ if (router_node == NULL) {
+ router_node = target;
+ }
+ dynamic_reason = crm_strdup_printf("Controller on %s did not return "
+ "result in time", router_node);
+ reason = dynamic_reason;
+ }
+
+ op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
+ PCMK_OCF_UNKNOWN_ERROR, reason);
+ op->call_id = -1;
+ op->user_data = pcmk__transition_key(controld_globals.transition_graph->id,
+ action->id, target_rc,
+ controld_globals.te_uuid);
+ free(dynamic_reason);
+ return op;
+}
+
+static void
+controld_record_action_event(pcmk__graph_action_t *action,
+ lrmd_event_data_t *op)
+{
+ cib_t *cib_conn = controld_globals.cib_conn;
+
+ xmlNode *state = NULL;
+ xmlNode *rsc = NULL;
+ xmlNode *action_rsc = NULL;
+
+ int rc = pcmk_ok;
+
+ const char *rsc_id = NULL;
+ const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
+ const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
+
+ int target_rc = get_target_rc(action);
+
+ action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE);
+ if (action_rsc == NULL) {
+ return;
+ }
+
+ rsc_id = ID(action_rsc);
+ CRM_CHECK(rsc_id != NULL,
+ crm_log_xml_err(action->xml, "Bad:action"); return);
+
+/*
+ update the CIB
+
+<node_state id="hadev">
+ <lrm>
+ <lrm_resources>
+ <lrm_resource id="rsc2" last_op="start" op_code="0" target="hadev"/>
+*/
+
+ state = create_xml_node(NULL, XML_CIB_TAG_STATE);
+
+ crm_xml_add(state, XML_ATTR_ID, target_uuid);
+ crm_xml_add(state, XML_ATTR_UNAME, target);
+
+ rsc = create_xml_node(state, XML_CIB_TAG_LRM);
+ crm_xml_add(rsc, XML_ATTR_ID, target_uuid);
+
+ rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES);
+ rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE);
+ crm_xml_add(rsc, XML_ATTR_ID, rsc_id);
+
+
+ crm_copy_xml_element(action_rsc, rsc, XML_ATTR_TYPE);
+ crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS);
+ crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER);
+
+ pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
+ __func__);
+
+ rc = cib_conn->cmds->modify(cib_conn, XML_CIB_TAG_STATUS, state,
+ cib_scope_local);
+ fsa_register_cib_callback(rc, NULL, cib_action_updated);
+ free_xml(state);
+
+ crm_trace("Sent CIB update (call ID %d) for synthesized event of action %d (%s on %s)",
+ rc, action->id, task_uuid, target);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update);
+}
+
+void
+controld_record_action_timeout(pcmk__graph_action_t *action)
+{
+ lrmd_event_data_t *op = NULL;
+
+ const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
+
+ int target_rc = get_target_rc(action);
+
+ crm_warn("%s %d: %s on %s timed out",
+ crm_element_name(action->xml), action->id, task_uuid, target);
+
+ op = synthesize_timeout_event(action, target_rc);
+ controld_record_action_event(action, op);
+ lrmd_free_event(op);
+}
+
+/*!
+ * \internal
+ * \brief Execute a resource action from a transition graph
+ *
+ * \param[in,out] graph Transition graph being executed
+ * \param[in,out] action Resource action to execute
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+execute_rsc_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
+{
+ /* never overwrite stop actions in the CIB with
+ * anything other than completed results
+ *
+ * Writing pending stops makes it look like the
+ * resource is running again
+ */
+ xmlNode *cmd = NULL;
+ xmlNode *rsc_op = NULL;
+
+ gboolean rc = TRUE;
+ gboolean no_wait = FALSE;
+ gboolean is_local = FALSE;
+
+ char *counter = NULL;
+ const char *task = NULL;
+ const char *value = NULL;
+ const char *on_node = NULL;
+ const char *router_node = NULL;
+ const char *task_uuid = NULL;
+
+ CRM_ASSERT(action != NULL);
+ CRM_ASSERT(action->xml != NULL);
+
+ pcmk__clear_graph_action_flags(action, pcmk__graph_action_executed);
+ on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+
+ CRM_CHECK(!pcmk__str_empty(on_node),
+ crm_err("Corrupted command(id=%s) %s: no node",
+ ID(action->xml), pcmk__s(task, "without task"));
+ return pcmk_rc_node_unknown);
+
+ rsc_op = action->xml;
+ task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
+ task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
+ router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE);
+
+ if (!router_node) {
+ router_node = on_node;
+ }
+
+ counter = pcmk__transition_key(controld_globals.transition_graph->id,
+ action->id, get_target_rc(action),
+ controld_globals.te_uuid);
+ crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter);
+
+ if (pcmk__str_eq(router_node, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ is_local = TRUE;
+ }
+
+ value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
+ if (crm_is_true(value)) {
+ no_wait = TRUE;
+ }
+
+ crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d",
+ task, task_uuid, (is_local? " locally" : ""), on_node,
+ (no_wait? " without waiting" : ""), action->id);
+
+ cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node,
+ CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);
+
+ if (is_local) {
+ /* shortcut local resource commands */
+ ha_msg_input_t data = {
+ .msg = cmd,
+ .xml = rsc_op,
+ };
+
+ fsa_data_t msg = {
+ .id = 0,
+ .data = &data,
+ .data_type = fsa_dt_ha_msg,
+ .fsa_input = I_NULL,
+ .fsa_cause = C_FSA_INTERNAL,
+ .actions = A_LRM_INVOKE,
+ .origin = __func__,
+ };
+
+ do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, controld_globals.fsa_state,
+ I_NULL, &msg);
+
+ } else {
+ rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE);
+ }
+
+ free(counter);
+ free_xml(cmd);
+
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_executed);
+
+ if (rc == FALSE) {
+ crm_err("Action %d failed: send", action->id);
+ return ECOMM;
+
+ } else if (no_wait) {
+ /* Just mark confirmed. Don't bump the job count only to immediately
+ * decrement it.
+ */
+ crm_info("Action %d confirmed - no wait", action->id);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
+ pcmk__update_graph(controld_globals.transition_graph, action);
+ trigger_graph();
+
+ } else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
+ crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.",
+ action->id, task, task_uuid, on_node, action->timeout);
+ } else {
+ if (action->timeout <= 0) {
+ crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead",
+ action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
+ action->timeout = (int) graph->network_delay;
+ }
+ te_update_job_count(action, 1);
+ te_start_action_timer(graph, action);
+ }
+
+ return pcmk_rc_ok;
+}
+
+struct te_peer_s
+{
+ char *name;
+ int jobs;
+ int migrate_jobs;
+};
+
+static void te_peer_free(gpointer p)
+{
+ struct te_peer_s *peer = p;
+
+ free(peer->name);
+ free(peer);
+}
+
+void te_reset_job_counts(void)
+{
+ GHashTableIter iter;
+ struct te_peer_s *peer = NULL;
+
+ if(te_targets == NULL) {
+ te_targets = pcmk__strkey_table(NULL, te_peer_free);
+ }
+
+ g_hash_table_iter_init(&iter, te_targets);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) {
+ peer->jobs = 0;
+ peer->migrate_jobs = 0;
+ }
+}
+
+static void
+te_update_job_count_on(const char *target, int offset, bool migrate)
+{
+ struct te_peer_s *r = NULL;
+
+ if(target == NULL || te_targets == NULL) {
+ return;
+ }
+
+ r = g_hash_table_lookup(te_targets, target);
+ if(r == NULL) {
+ r = calloc(1, sizeof(struct te_peer_s));
+ r->name = strdup(target);
+ g_hash_table_insert(te_targets, r->name, r);
+ }
+
+ r->jobs += offset;
+ if(migrate) {
+ r->migrate_jobs += offset;
+ }
+ crm_trace("jobs[%s] = %d", target, r->jobs);
+}
+
+static void
+te_update_job_count(pcmk__graph_action_t *action, int offset)
+{
+ const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
+ const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+
+ if ((action->type != pcmk__rsc_graph_action) || (target == NULL)) {
+ /* No limit on these */
+ return;
+ }
+
+ /* if we have a router node, this means the action is performing
+ * on a remote node. For now, we count all actions occurring on a
+ * remote node against the job list on the cluster node hosting
+ * the connection resources */
+ target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
+
+ if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE,
+ CRMD_ACTION_MIGRATED, NULL)) {
+
+ const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
+ const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
+
+ te_update_job_count_on(t1, offset, TRUE);
+ te_update_job_count_on(t2, offset, TRUE);
+ return;
+ } else if (target == NULL) {
+ target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ }
+
+ te_update_job_count_on(target, offset, FALSE);
+}
+
+/*!
+ * \internal
+ * \brief Check whether a graph action is allowed to be executed on a node
+ *
+ * \param[in] graph Transition graph being executed
+ * \param[in] action Graph action being executed
+ * \param[in] target Name of node where action should be executed
+ *
+ * \return true if action is allowed, otherwise false
+ */
+static bool
+allowed_on_node(const pcmk__graph_t *graph, const pcmk__graph_action_t *action,
+ const char *target)
+{
+ int limit = 0;
+ struct te_peer_s *r = NULL;
+ const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
+ const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
+
+ if(target == NULL) {
+ /* No limit on these */
+ return true;
+
+ } else if(te_targets == NULL) {
+ return false;
+ }
+
+ r = g_hash_table_lookup(te_targets, target);
+ limit = throttle_get_job_limit(target);
+
+ if(r == NULL) {
+ r = calloc(1, sizeof(struct te_peer_s));
+ r->name = strdup(target);
+ g_hash_table_insert(te_targets, r->name, r);
+ }
+
+ if(limit <= r->jobs) {
+ crm_trace("Peer %s is over their job limit of %d (%d): deferring %s",
+ target, limit, r->jobs, id);
+ return false;
+
+ } else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) {
+ if (pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) {
+ crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s",
+ target, graph->migration_limit, r->migrate_jobs, id);
+ return false;
+ }
+ }
+
+ crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit);
+
+ return true;
+}
+
+/*!
+ * \internal
+ * \brief Check whether a graph action is allowed to be executed
+ *
+ * \param[in] graph Transition graph being executed
+ * \param[in] action Graph action being executed
+ *
+ * \return true if action is allowed, otherwise false
+ */
+static bool
+graph_action_allowed(pcmk__graph_t *graph, pcmk__graph_action_t *action)
+{
+ const char *target = NULL;
+ const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
+
+ if (action->type != pcmk__rsc_graph_action) {
+ /* No limit on these */
+ return true;
+ }
+
+ /* if we have a router node, this means the action is performing
+ * on a remote node. For now, we count all actions occurring on a
+ * remote node against the job list on the cluster node hosting
+ * the connection resources */
+ target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
+
+ if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE,
+ CRMD_ACTION_MIGRATED, NULL)) {
+ target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
+ if (!allowed_on_node(graph, action, target)) {
+ return false;
+ }
+
+ target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
+
+ } else if (target == NULL) {
+ target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ }
+
+ return allowed_on_node(graph, action, target);
+}
+
+/*!
+ * \brief Confirm a graph action (and optionally update graph)
+ *
+ * \param[in,out] action Action to confirm
+ * \param[in,out] graph Update and trigger this graph (if non-NULL)
+ */
+void
+te_action_confirmed(pcmk__graph_action_t *action, pcmk__graph_t *graph)
+{
+ if (!pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
+ if ((action->type == pcmk__rsc_graph_action)
+ && (crm_element_value(action->xml, XML_LRM_ATTR_TARGET) != NULL)) {
+ te_update_job_count(action, -1);
+ }
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
+ }
+ if (graph) {
+ pcmk__update_graph(graph, action);
+ trigger_graph();
+ }
+}
+
+static pcmk__graph_functions_t te_graph_fns = {
+ execute_pseudo_action,
+ execute_rsc_action,
+ execute_cluster_action,
+ controld_execute_fence_action,
+ graph_action_allowed,
+};
+
+/*
+ * \internal
+ * \brief Register the transitioner's graph functions with \p libpacemaker
+ */
+void
+controld_register_graph_functions(void)
+{
+ pcmk__set_graph_functions(&te_graph_fns);
+}
+
+void
+notify_crmd(pcmk__graph_t *graph)
+{
+ const char *type = "unknown";
+ enum crmd_fsa_input event = I_NULL;
+
+ crm_debug("Processing transition completion in state %s",
+ fsa_state2string(controld_globals.fsa_state));
+
+ CRM_CHECK(graph->complete, graph->complete = true);
+
+ switch (graph->completion_action) {
+ case pcmk__graph_wait:
+ type = "stop";
+ if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
+ event = I_TE_SUCCESS;
+ }
+ break;
+ case pcmk__graph_done:
+ type = "done";
+ if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
+ event = I_TE_SUCCESS;
+ }
+ break;
+
+ case pcmk__graph_restart:
+ type = "restart";
+ if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
+ if (controld_get_period_transition_timer() > 0) {
+ controld_stop_transition_timer();
+ controld_start_transition_timer();
+ } else {
+ event = I_PE_CALC;
+ }
+
+ } else if (controld_globals.fsa_state == S_POLICY_ENGINE) {
+ controld_set_fsa_action_flags(A_PE_INVOKE);
+ controld_trigger_fsa();
+ }
+ break;
+
+ case pcmk__graph_shutdown:
+ type = "shutdown";
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ event = I_STOP;
+
+ } else {
+ crm_err("We didn't ask to be shut down, yet the scheduler is telling us to");
+ event = I_TERMINATE;
+ }
+ }
+
+ crm_debug("Transition %d status: %s - %s", graph->id, type,
+ pcmk__s(graph->abort_reason, "unspecified reason"));
+
+ graph->abort_reason = NULL;
+ graph->completion_action = pcmk__graph_done;
+
+ if (event != I_NULL) {
+ register_fsa_input(C_FSA_INTERNAL, event, NULL);
+ } else {
+ controld_trigger_fsa();
+ }
+}
diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c
new file mode 100644
index 0000000..cf9de83
--- /dev/null
+++ b/daemons/controld/controld_te_callbacks.c
@@ -0,0 +1,689 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/stat.h>
+
+#include <crm/crm.h>
+#include <crm/common/xml.h>
+#include <crm/common/xml_internal.h>
+#include <crm/msg_xml.h>
+#include <crm/cluster.h> /* For ONLINESTATUS etc */
+
+#include <pacemaker-controld.h>
+
+void te_update_confirm(const char *event, xmlNode * msg);
+
+#define RSC_OP_PREFIX "//" XML_TAG_DIFF_ADDED "//" XML_TAG_CIB \
+ "//" XML_LRM_TAG_RSC_OP "[@" XML_ATTR_ID "='"
+
+// An explicit shutdown-lock of 0 means the lock has been cleared
+static bool
+shutdown_lock_cleared(xmlNode *lrm_resource)
+{
+ time_t shutdown_lock = 0;
+
+ return (crm_element_value_epoch(lrm_resource, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
+ &shutdown_lock) == pcmk_ok)
+ && (shutdown_lock == 0);
+}
+
+static void
+te_update_diff_v1(const char *event, xmlNode *diff)
+{
+ int lpc, max;
+ xmlXPathObject *xpathObj = NULL;
+ GString *rsc_op_xpath = NULL;
+
+ CRM_CHECK(diff != NULL, return);
+
+ pcmk__output_set_log_level(controld_globals.logger_out, LOG_TRACE);
+ controld_globals.logger_out->message(controld_globals.logger_out,
+ "xml-patchset", diff);
+
+ if (cib__config_changed_v1(NULL, NULL, &diff)) {
+ abort_transition(INFINITY, pcmk__graph_restart, "Non-status change",
+ diff);
+ goto bail; /* configuration changed */
+ }
+
+ /* Tickets Attributes - Added/Updated */
+ xpathObj =
+ xpath_search(diff,
+ "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_TICKETS);
+ if (numXpathResults(xpathObj) > 0) {
+ xmlNode *aborted = getXpathResult(xpathObj, 0);
+
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Ticket attribute: update", aborted);
+ goto bail;
+
+ }
+ freeXpathObject(xpathObj);
+
+ /* Tickets Attributes - Removed */
+ xpathObj =
+ xpath_search(diff,
+ "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS);
+ if (numXpathResults(xpathObj) > 0) {
+ xmlNode *aborted = getXpathResult(xpathObj, 0);
+
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Ticket attribute: removal", aborted);
+ goto bail;
+ }
+ freeXpathObject(xpathObj);
+
+ /* Transient Attributes - Removed */
+ xpathObj =
+ xpath_search(diff,
+ "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//"
+ XML_TAG_TRANSIENT_NODEATTRS);
+ if (numXpathResults(xpathObj) > 0) {
+ xmlNode *aborted = getXpathResult(xpathObj, 0);
+
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Transient attribute: removal", aborted);
+ goto bail;
+
+ }
+ freeXpathObject(xpathObj);
+
+ // Check for lrm_resource entries
+ xpathObj = xpath_search(diff,
+ "//" F_CIB_UPDATE_RESULT
+ "//" XML_TAG_DIFF_ADDED
+ "//" XML_LRM_TAG_RESOURCE);
+ max = numXpathResults(xpathObj);
+
+ /*
+ * Updates by, or in response to, graph actions will never affect more than
+ * one resource at a time, so such updates indicate an LRM refresh. In that
+ * case, start a new transition rather than check each result individually,
+ * which can result in _huge_ speedups in large clusters.
+ *
+ * Unfortunately, we can only do so when there are no pending actions.
+ * Otherwise, we could mistakenly throw away those results here, and
+ * the cluster will stall waiting for them and time out the operation.
+ */
+ if ((controld_globals.transition_graph->pending == 0) && (max > 1)) {
+ crm_debug("Ignoring resource operation updates due to history refresh of %d resources",
+ max);
+ crm_log_xml_trace(diff, "lrm-refresh");
+ abort_transition(INFINITY, pcmk__graph_restart, "History refresh",
+ NULL);
+ goto bail;
+ }
+
+ if (max == 1) {
+ xmlNode *lrm_resource = getXpathResult(xpathObj, 0);
+
+ if (shutdown_lock_cleared(lrm_resource)) {
+ // @TODO would be more efficient to abort once after transition done
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Shutdown lock cleared", lrm_resource);
+ // Still process results, so we stop timers and update failcounts
+ }
+ }
+ freeXpathObject(xpathObj);
+
+ /* Process operation updates */
+ xpathObj =
+ xpath_search(diff,
+ "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP);
+ max = numXpathResults(xpathObj);
+ if (max > 0) {
+ int lpc = 0;
+
+ for (lpc = 0; lpc < max; lpc++) {
+ xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
+ const char *node = get_node_id(rsc_op);
+
+ process_graph_event(rsc_op, node);
+ }
+ }
+ freeXpathObject(xpathObj);
+
+ /* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */
+ xpathObj = xpath_search(diff, "//" XML_TAG_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP);
+ max = numXpathResults(xpathObj);
+ for (lpc = 0; lpc < max; lpc++) {
+ const char *op_id = NULL;
+ xmlXPathObject *op_match = NULL;
+ xmlNode *match = getXpathResult(xpathObj, lpc);
+
+ CRM_LOG_ASSERT(match != NULL);
+ if(match == NULL) { continue; };
+
+ op_id = ID(match);
+
+ if (rsc_op_xpath == NULL) {
+ rsc_op_xpath = g_string_new(RSC_OP_PREFIX);
+ } else {
+ g_string_truncate(rsc_op_xpath, sizeof(RSC_OP_PREFIX) - 1);
+ }
+ pcmk__g_strcat(rsc_op_xpath, op_id, "']", NULL);
+
+ op_match = xpath_search(diff, (const char *) rsc_op_xpath->str);
+ if (numXpathResults(op_match) == 0) {
+ /* Prevent false positives by matching cancelations too */
+ const char *node = get_node_id(match);
+ pcmk__graph_action_t *cancelled = get_cancel_action(op_id, node);
+
+ if (cancelled == NULL) {
+ crm_debug("No match for deleted action %s (%s on %s)",
+ (const char *) rsc_op_xpath->str, op_id, node);
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Resource op removal", match);
+ freeXpathObject(op_match);
+ goto bail;
+
+ } else {
+ crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d",
+ op_id, node, cancelled->id);
+ }
+ }
+
+ freeXpathObject(op_match);
+ }
+
+ bail:
+ freeXpathObject(xpathObj);
+ if (rsc_op_xpath != NULL) {
+ g_string_free(rsc_op_xpath, TRUE);
+ }
+}
+
+static void
+process_lrm_resource_diff(xmlNode *lrm_resource, const char *node)
+{
+ for (xmlNode *rsc_op = pcmk__xml_first_child(lrm_resource); rsc_op != NULL;
+ rsc_op = pcmk__xml_next(rsc_op)) {
+ process_graph_event(rsc_op, node);
+ }
+ if (shutdown_lock_cleared(lrm_resource)) {
+ // @TODO would be more efficient to abort once after transition done
+ abort_transition(INFINITY, pcmk__graph_restart, "Shutdown lock cleared",
+ lrm_resource);
+ }
+}
+
+static void
+process_resource_updates(const char *node, xmlNode *xml, xmlNode *change,
+ const char *op, const char *xpath)
+{
+ xmlNode *rsc = NULL;
+
+ if (xml == NULL) {
+ return;
+ }
+
+ if (strcmp(TYPE(xml), XML_CIB_TAG_LRM) == 0) {
+ xml = first_named_child(xml, XML_LRM_TAG_RESOURCES);
+ CRM_CHECK(xml != NULL, return);
+ }
+
+ CRM_CHECK(strcmp(TYPE(xml), XML_LRM_TAG_RESOURCES) == 0, return);
+
+ /*
+ * Updates by, or in response to, TE actions will never contain updates
+ * for more than one resource at a time, so such updates indicate an
+ * LRM refresh.
+ *
+ * In that case, start a new transition rather than check each result
+ * individually, which can result in _huge_ speedups in large clusters.
+ *
+ * Unfortunately, we can only do so when there are no pending actions.
+ * Otherwise, we could mistakenly throw away those results here, and
+ * the cluster will stall waiting for them and time out the operation.
+ */
+ if ((controld_globals.transition_graph->pending == 0)
+ && (xml->children != NULL) && (xml->children->next != NULL)) {
+
+ crm_log_xml_trace(change, "lrm-refresh");
+ abort_transition(INFINITY, pcmk__graph_restart, "History refresh",
+ NULL);
+ return;
+ }
+
+ for (rsc = pcmk__xml_first_child(xml); rsc != NULL;
+ rsc = pcmk__xml_next(rsc)) {
+ crm_trace("Processing %s", ID(rsc));
+ process_lrm_resource_diff(rsc, node);
+ }
+}
+
+static char *extract_node_uuid(const char *xpath)
+{
+ char *mutable_path = strdup(xpath);
+ char *node_uuid = NULL;
+ char *search = NULL;
+ char *match = NULL;
+
+ match = strstr(mutable_path, "node_state[@" XML_ATTR_ID "=\'");
+ if (match == NULL) {
+ free(mutable_path);
+ return NULL;
+ }
+ match += strlen("node_state[@" XML_ATTR_ID "=\'");
+
+ search = strchr(match, '\'');
+ if (search == NULL) {
+ free(mutable_path);
+ return NULL;
+ }
+ search[0] = 0;
+
+ node_uuid = strdup(match);
+ free(mutable_path);
+ return node_uuid;
+}
+
+static void
+abort_unless_down(const char *xpath, const char *op, xmlNode *change,
+ const char *reason)
+{
+ char *node_uuid = NULL;
+ pcmk__graph_action_t *down = NULL;
+
+ if(!pcmk__str_eq(op, "delete", pcmk__str_casei)) {
+ abort_transition(INFINITY, pcmk__graph_restart, reason, change);
+ return;
+ }
+
+ node_uuid = extract_node_uuid(xpath);
+ if(node_uuid == NULL) {
+ crm_err("Could not extract node ID from %s", xpath);
+ abort_transition(INFINITY, pcmk__graph_restart, reason, change);
+ return;
+ }
+
+ down = match_down_event(node_uuid);
+ if (down == NULL) {
+ crm_trace("Not expecting %s to be down (%s)", node_uuid, xpath);
+ abort_transition(INFINITY, pcmk__graph_restart, reason, change);
+ } else {
+ crm_trace("Expecting changes to %s (%s)", node_uuid, xpath);
+ }
+ free(node_uuid);
+}
+
+static void
+process_op_deletion(const char *xpath, xmlNode *change)
+{
+ char *mutable_key = strdup(xpath);
+ char *key;
+ char *node_uuid;
+
+ // Extract the part of xpath between last pair of single quotes
+ key = strrchr(mutable_key, '\'');
+ if (key != NULL) {
+ *key = '\0';
+ key = strrchr(mutable_key, '\'');
+ }
+ if (key == NULL) {
+ crm_warn("Ignoring malformed CIB update (resource deletion of %s)",
+ xpath);
+ free(mutable_key);
+ return;
+ }
+ ++key;
+
+ node_uuid = extract_node_uuid(xpath);
+ if (confirm_cancel_action(key, node_uuid) == FALSE) {
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Resource operation removal", change);
+ }
+ free(mutable_key);
+ free(node_uuid);
+}
+
+static void
+process_delete_diff(const char *xpath, const char *op, xmlNode *change)
+{
+ if (strstr(xpath, "/" XML_LRM_TAG_RSC_OP "[")) {
+ process_op_deletion(xpath, change);
+
+ } else if (strstr(xpath, "/" XML_CIB_TAG_LRM "[")) {
+ abort_unless_down(xpath, op, change, "Resource state removal");
+
+ } else if (strstr(xpath, "/" XML_CIB_TAG_STATE "[")) {
+ abort_unless_down(xpath, op, change, "Node state removal");
+
+ } else {
+ crm_trace("Ignoring delete of %s", xpath);
+ }
+}
+
+static void
+process_node_state_diff(xmlNode *state, xmlNode *change, const char *op,
+ const char *xpath)
+{
+ xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM);
+
+ process_resource_updates(ID(state), lrm, change, op, xpath);
+}
+
+static void
+process_status_diff(xmlNode *status, xmlNode *change, const char *op,
+ const char *xpath)
+{
+ for (xmlNode *state = pcmk__xml_first_child(status); state != NULL;
+ state = pcmk__xml_next(state)) {
+ process_node_state_diff(state, change, op, xpath);
+ }
+}
+
+static void
+process_cib_diff(xmlNode *cib, xmlNode *change, const char *op,
+ const char *xpath)
+{
+ xmlNode *status = first_named_child(cib, XML_CIB_TAG_STATUS);
+ xmlNode *config = first_named_child(cib, XML_CIB_TAG_CONFIGURATION);
+
+ if (status) {
+ process_status_diff(status, change, op, xpath);
+ }
+ if (config) {
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Non-status-only change", change);
+ }
+}
+
+static void
+te_update_diff_v2(xmlNode *diff)
+{
+ crm_log_xml_trace(diff, "Patch:Raw");
+
+ for (xmlNode *change = pcmk__xml_first_child(diff); change != NULL;
+ change = pcmk__xml_next(change)) {
+
+ xmlNode *match = NULL;
+ const char *name = NULL;
+ const char *xpath = crm_element_value(change, XML_DIFF_PATH);
+
+ // Possible ops: create, modify, delete, move
+ const char *op = crm_element_value(change, XML_DIFF_OP);
+
+ // Ignore uninteresting updates
+ if (op == NULL) {
+ continue;
+
+ } else if (xpath == NULL) {
+ crm_trace("Ignoring %s change for version field", op);
+ continue;
+
+ } else if ((strcmp(op, "move") == 0)
+ && (strstr(xpath,
+ "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION
+ "/" XML_CIB_TAG_RESOURCES) == NULL)) {
+ /* We still need to consider moves within the resources section,
+ * since they affect placement order.
+ */
+ crm_trace("Ignoring move change at %s", xpath);
+ continue;
+ }
+
+ // Find the result of create/modify ops
+ if (strcmp(op, "create") == 0) {
+ match = change->children;
+
+ } else if (strcmp(op, "modify") == 0) {
+ match = first_named_child(change, XML_DIFF_RESULT);
+ if(match) {
+ match = match->children;
+ }
+
+ } else if (!pcmk__str_any_of(op, "delete", "move", NULL)) {
+ crm_warn("Ignoring malformed CIB update (%s operation on %s is unrecognized)",
+ op, xpath);
+ continue;
+ }
+
+ if (match) {
+ if (match->type == XML_COMMENT_NODE) {
+ crm_trace("Ignoring %s operation for comment at %s", op, xpath);
+ continue;
+ }
+ name = (const char *)match->name;
+ }
+
+ crm_trace("Handling %s operation for %s%s%s",
+ op, (xpath? xpath : "CIB"),
+ (name? " matched by " : ""), (name? name : ""));
+
+ if (strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION)) {
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Configuration change", change);
+ break; // Won't be packaged with operation results we may be waiting for
+
+ } else if (strstr(xpath, "/" XML_CIB_TAG_TICKETS)
+ || pcmk__str_eq(name, XML_CIB_TAG_TICKETS, pcmk__str_none)) {
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Ticket attribute change", change);
+ break; // Won't be packaged with operation results we may be waiting for
+
+ } else if (strstr(xpath, "/" XML_TAG_TRANSIENT_NODEATTRS "[")
+ || pcmk__str_eq(name, XML_TAG_TRANSIENT_NODEATTRS,
+ pcmk__str_none)) {
+ abort_unless_down(xpath, op, change, "Transient attribute change");
+ break; // Won't be packaged with operation results we may be waiting for
+
+ } else if (strcmp(op, "delete") == 0) {
+ process_delete_diff(xpath, op, change);
+
+ } else if (name == NULL) {
+ crm_warn("Ignoring malformed CIB update (%s at %s has no result)",
+ op, xpath);
+
+ } else if (strcmp(name, XML_TAG_CIB) == 0) {
+ process_cib_diff(match, change, op, xpath);
+
+ } else if (strcmp(name, XML_CIB_TAG_STATUS) == 0) {
+ process_status_diff(match, change, op, xpath);
+
+ } else if (strcmp(name, XML_CIB_TAG_STATE) == 0) {
+ process_node_state_diff(match, change, op, xpath);
+
+ } else if (strcmp(name, XML_CIB_TAG_LRM) == 0) {
+ process_resource_updates(ID(match), match, change, op, xpath);
+
+ } else if (strcmp(name, XML_LRM_TAG_RESOURCES) == 0) {
+ char *local_node = pcmk__xpath_node_id(xpath, "lrm");
+
+ process_resource_updates(local_node, match, change, op, xpath);
+ free(local_node);
+
+ } else if (strcmp(name, XML_LRM_TAG_RESOURCE) == 0) {
+ char *local_node = pcmk__xpath_node_id(xpath, "lrm");
+
+ process_lrm_resource_diff(match, local_node);
+ free(local_node);
+
+ } else if (strcmp(name, XML_LRM_TAG_RSC_OP) == 0) {
+ char *local_node = pcmk__xpath_node_id(xpath, "lrm");
+
+ process_graph_event(match, local_node);
+ free(local_node);
+
+ } else {
+ crm_warn("Ignoring malformed CIB update (%s at %s has unrecognized result %s)",
+ op, xpath, name);
+ }
+ }
+}
+
+void
+te_update_diff(const char *event, xmlNode * msg)
+{
+ xmlNode *diff = NULL;
+ const char *op = NULL;
+ int rc = -EINVAL;
+ int format = 1;
+ int p_add[] = { 0, 0, 0 };
+ int p_del[] = { 0, 0, 0 };
+
+ CRM_CHECK(msg != NULL, return);
+ crm_element_value_int(msg, F_CIB_RC, &rc);
+
+ if (controld_globals.transition_graph == NULL) {
+ crm_trace("No graph");
+ return;
+
+ } else if (rc < pcmk_ok) {
+ crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc));
+ return;
+
+ } else if (controld_globals.transition_graph->complete
+ && (controld_globals.fsa_state != S_IDLE)
+ && (controld_globals.fsa_state != S_TRANSITION_ENGINE)
+ && (controld_globals.fsa_state != S_POLICY_ENGINE)) {
+ crm_trace("Filter state=%s (complete)",
+ fsa_state2string(controld_globals.fsa_state));
+ return;
+ }
+
+ op = crm_element_value(msg, F_CIB_OPERATION);
+ diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
+
+ xml_patch_versions(diff, p_add, p_del);
+ crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op,
+ p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2],
+ fsa_state2string(controld_globals.fsa_state));
+
+ crm_element_value_int(diff, "format", &format);
+ switch (format) {
+ case 1:
+ te_update_diff_v1(event, diff);
+ break;
+ case 2:
+ te_update_diff_v2(diff);
+ break;
+ default:
+ crm_warn("Ignoring malformed CIB update (unknown patch format %d)",
+ format);
+ }
+ controld_remove_all_outside_events();
+}
+
+void
+process_te_message(xmlNode * msg, xmlNode * xml_data)
+{
+ const char *value = NULL;
+ xmlXPathObject *xpathObj = NULL;
+ int nmatches = 0;
+
+ CRM_CHECK(msg != NULL, return);
+
+ // Transition requests must specify transition engine as subsystem
+ value = crm_element_value(msg, F_CRM_SYS_TO);
+ if (pcmk__str_empty(value)
+ || !pcmk__str_eq(value, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
+ crm_info("Received invalid transition request: subsystem '%s' not '"
+ CRM_SYSTEM_TENGINE "'", pcmk__s(value, ""));
+ return;
+ }
+
+ // Only the lrm_invoke command is supported as a transition request
+ value = crm_element_value(msg, F_CRM_TASK);
+ if (!pcmk__str_eq(value, CRM_OP_INVOKE_LRM, pcmk__str_none)) {
+ crm_info("Received invalid transition request: command '%s' not '"
+ CRM_OP_INVOKE_LRM "'", pcmk__s(value, ""));
+ return;
+ }
+
+ // Transition requests must be marked as coming from the executor
+ value = crm_element_value(msg, F_CRM_SYS_FROM);
+ if (!pcmk__str_eq(value, CRM_SYSTEM_LRMD, pcmk__str_none)) {
+ crm_info("Received invalid transition request: from '%s' not '"
+ CRM_SYSTEM_LRMD "'", pcmk__s(value, ""));
+ return;
+ }
+
+ crm_debug("Processing transition request with ref='%s' origin='%s'",
+ pcmk__s(crm_element_value(msg, F_CRM_REFERENCE), ""),
+ pcmk__s(crm_element_value(msg, F_ORIG), ""));
+
+ xpathObj = xpath_search(xml_data, "//" XML_LRM_TAG_RSC_OP);
+ nmatches = numXpathResults(xpathObj);
+ if (nmatches == 0) {
+ crm_err("Received transition request with no results (bug?)");
+ } else {
+ for (int lpc = 0; lpc < nmatches; lpc++) {
+ xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
+ const char *node = get_node_id(rsc_op);
+
+ process_graph_event(rsc_op, node);
+ }
+ }
+ freeXpathObject(xpathObj);
+}
+
+void
+cib_action_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ if (rc < pcmk_ok) {
+ crm_err("Update %d FAILED: %s", call_id, pcmk_strerror(rc));
+ }
+}
+
+/*!
+ * \brief Handle a timeout in node-to-node communication
+ *
+ * \param[in,out] data Pointer to graph action
+ *
+ * \return FALSE (indicating that source should be not be re-added)
+ */
+gboolean
+action_timer_callback(gpointer data)
+{
+ pcmk__graph_action_t *action = (pcmk__graph_action_t *) data;
+ const char *task = NULL;
+ const char *on_node = NULL;
+ const char *via_node = NULL;
+
+ CRM_CHECK(data != NULL, return FALSE);
+
+ stop_te_timer(action);
+
+ task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
+ on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ via_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
+
+ if (controld_globals.transition_graph->complete) {
+ crm_notice("Node %s did not send %s result (via %s) within %dms "
+ "(ignoring because transition not in progress)",
+ (on_node? on_node : ""), (task? task : "unknown action"),
+ (via_node? via_node : "controller"), action->timeout);
+ } else {
+ /* fail the action */
+
+ crm_err("Node %s did not send %s result (via %s) within %dms "
+ "(action timeout plus cluster-delay)",
+ (on_node? on_node : ""), (task? task : "unknown action"),
+ (via_node? via_node : "controller"),
+ (action->timeout
+ + controld_globals.transition_graph->network_delay));
+ pcmk__log_graph_action(LOG_ERR, action);
+
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
+
+ te_action_confirmed(action, controld_globals.transition_graph);
+ abort_transition(INFINITY, pcmk__graph_restart, "Action lost", NULL);
+
+ // Record timeout in the CIB if appropriate
+ if ((action->type == pcmk__rsc_graph_action)
+ && controld_action_is_recordable(task)) {
+ controld_record_action_timeout(action);
+ }
+ }
+
+ return FALSE;
+}
diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c
new file mode 100644
index 0000000..d4e2b0f
--- /dev/null
+++ b/daemons/controld/controld_te_events.c
@@ -0,0 +1,601 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <crm/crm.h>
+#include <crm/cib.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+
+#include <pacemaker-controld.h>
+
+#include <crm/common/attrd_internal.h>
+#include <crm/common/ipc_attrd_internal.h>
+
+/*!
+ * \internal
+ * \brief Action numbers of outside events processed in current update diff
+ *
+ * This table is to be used as a set. It should be empty when the transitioner
+ * begins processing a CIB update diff. It ensures that if there are multiple
+ * events (for example, "_last_0" and "_last_failure_0") for the same action,
+ * only one of them updates the failcount. Events that originate outside the
+ * cluster can't be confirmed, since they're not in the transition graph.
+ */
+static GHashTable *outside_events = NULL;
+
+/*!
+ * \internal
+ * \brief Empty the hash table containing action numbers of outside events
+ */
+void
+controld_remove_all_outside_events(void)
+{
+ if (outside_events != NULL) {
+ g_hash_table_remove_all(outside_events);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Destroy the hash table containing action numbers of outside events
+ */
+void
+controld_destroy_outside_events_table(void)
+{
+ if (outside_events != NULL) {
+ g_hash_table_destroy(outside_events);
+ outside_events = NULL;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add an outside event's action number to a set
+ *
+ * \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the
+ * event was not already in the set, or \p pcmk_rc_already otherwise.
+ */
+static int
+record_outside_event(gint action_num)
+{
+ if (outside_events == NULL) {
+ outside_events = g_hash_table_new(NULL, NULL);
+ }
+
+ if (g_hash_table_add(outside_events, GINT_TO_POINTER(action_num))) {
+ return pcmk_rc_ok;
+ }
+ return pcmk_rc_already;
+}
+
+gboolean
+fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node)
+{
+ const char *target_uuid = NULL;
+ const char *router = NULL;
+ const char *router_uuid = NULL;
+ xmlNode *last_action = NULL;
+
+ GList *gIter = NULL;
+ GList *gIter2 = NULL;
+
+ if (graph == NULL || graph->complete) {
+ return FALSE;
+ }
+
+ gIter = graph->synapses;
+ for (; gIter != NULL; gIter = gIter->next) {
+ pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data;
+
+ if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) {
+ /* We've already been here */
+ continue;
+ }
+
+ gIter2 = synapse->actions;
+ for (; gIter2 != NULL; gIter2 = gIter2->next) {
+ pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data;
+
+ if ((action->type == pcmk__pseudo_graph_action)
+ || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
+ continue;
+ } else if (action->type == pcmk__cluster_graph_action) {
+ const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
+
+ if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
+ continue;
+ }
+ }
+
+ target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
+ router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
+ if (router) {
+ crm_node_t *node = crm_get_peer(0, router);
+ if (node) {
+ router_uuid = node->uuid;
+ }
+ }
+
+ if (pcmk__str_eq(target_uuid, down_node, pcmk__str_casei) || pcmk__str_eq(router_uuid, down_node, pcmk__str_casei)) {
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
+ pcmk__set_synapse_flags(synapse, pcmk__synapse_failed);
+ last_action = action->xml;
+ stop_te_timer(action);
+ pcmk__update_graph(graph, action);
+
+ if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
+ crm_notice("Action %d (%s) was pending on %s (offline)",
+ action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node);
+ } else {
+ crm_info("Action %d (%s) is scheduled for %s (offline)",
+ action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node);
+ }
+ }
+ }
+ }
+
+ if (last_action != NULL) {
+ crm_info("Node %s shutdown resulted in un-runnable actions", down_node);
+ abort_transition(INFINITY, pcmk__graph_restart, "Node failure",
+ last_action);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+/*!
+ * \internal
+ * \brief Update failure-related node attributes if warranted
+ *
+ * \param[in] event XML describing operation that (maybe) failed
+ * \param[in] event_node_uuid Node that event occurred on
+ * \param[in] rc Actual operation return code
+ * \param[in] target_rc Expected operation return code
+ * \param[in] do_update If TRUE, do update regardless of operation type
+ * \param[in] ignore_failures If TRUE, update last failure but not fail count
+ *
+ * \return TRUE if this was not a direct nack, success or lrm status refresh
+ */
+static gboolean
+update_failcount(const xmlNode *event, const char *event_node_uuid, int rc,
+ int target_rc, gboolean do_update, gboolean ignore_failures)
+{
+ guint interval_ms = 0;
+
+ char *task = NULL;
+ char *rsc_id = NULL;
+
+ const char *value = NULL;
+ const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
+ const char *on_uname = crm_peer_uname(event_node_uuid);
+ const char *origin = crm_element_value(event, XML_ATTR_ORIGIN);
+
+ // Nothing needs to be done for success or status refresh
+ if (rc == target_rc) {
+ return FALSE;
+ } else if (pcmk__str_eq(origin, "build_active_RAs", pcmk__str_casei)) {
+ crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh",
+ id, rc, on_uname);
+ return FALSE;
+ }
+
+ /* Sanity check */
+ CRM_CHECK(on_uname != NULL, return TRUE);
+ CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval_ms),
+ crm_err("Couldn't parse: %s", ID(event)); goto bail);
+
+ /* Decide whether update is necessary and what value to use */
+ if ((interval_ms > 0)
+ || pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_none)
+ || pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_none)) {
+ do_update = TRUE;
+
+ } else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_none)) {
+ do_update = TRUE;
+ value = pcmk__s(controld_globals.transition_graph->failed_start_offset,
+ CRM_INFINITY_S);
+
+ } else if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_none)) {
+ do_update = TRUE;
+ value = pcmk__s(controld_globals.transition_graph->failed_stop_offset,
+ CRM_INFINITY_S);
+ }
+
+ if (do_update) {
+ pcmk__attrd_query_pair_t *fail_pair = NULL;
+ pcmk__attrd_query_pair_t *last_pair = NULL;
+ char *fail_name = NULL;
+ char *last_name = NULL;
+ GList *attrs = NULL;
+
+ uint32_t opts = pcmk__node_attr_none;
+
+ char *now = pcmk__ttoa(time(NULL));
+
+ // Fail count will be either incremented or set to infinity
+ if (!pcmk_str_is_infinity(value)) {
+ value = XML_NVPAIR_ATTR_VALUE "++";
+ }
+
+ if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) {
+ opts |= pcmk__node_attr_remote;
+ }
+
+ crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)",
+ (ignore_failures? "last failure" : "failcount"),
+ rsc_id, on_uname, task, rc, value, now);
+
+ /* Update the fail count, if we're not ignoring failures */
+ if (!ignore_failures) {
+ fail_pair = calloc(1, sizeof(pcmk__attrd_query_pair_t));
+ CRM_ASSERT(fail_pair != NULL);
+
+ fail_name = pcmk__failcount_name(rsc_id, task, interval_ms);
+ fail_pair->name = fail_name;
+ fail_pair->value = value;
+ fail_pair->node = on_uname;
+
+ attrs = g_list_prepend(attrs, fail_pair);
+ }
+
+ /* Update the last failure time (even if we're ignoring failures,
+ * so that failure can still be detected and shown, e.g. by crm_mon)
+ */
+ last_pair = calloc(1, sizeof(pcmk__attrd_query_pair_t));
+ CRM_ASSERT(last_pair != NULL);
+
+ last_name = pcmk__lastfailure_name(rsc_id, task, interval_ms);
+ last_pair->name = last_name;
+ last_pair->value = now;
+ last_pair->node = on_uname;
+
+ attrs = g_list_prepend(attrs, last_pair);
+
+ update_attrd_list(attrs, opts);
+
+ free(fail_name);
+ free(fail_pair);
+
+ free(last_name);
+ free(last_pair);
+ g_list_free(attrs);
+
+ free(now);
+ }
+
+ bail:
+ free(rsc_id);
+ free(task);
+ return TRUE;
+}
+
+pcmk__graph_action_t *
+controld_get_action(int id)
+{
+ for (GList *item = controld_globals.transition_graph->synapses;
+ item != NULL; item = item->next) {
+ pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) item->data;
+
+ for (GList *item2 = synapse->actions; item2; item2 = item2->next) {
+ pcmk__graph_action_t *action = (pcmk__graph_action_t *) item2->data;
+
+ if (action->id == id) {
+ return action;
+ }
+ }
+ }
+ return NULL;
+}
+
+pcmk__graph_action_t *
+get_cancel_action(const char *id, const char *node)
+{
+ GList *gIter = NULL;
+ GList *gIter2 = NULL;
+
+ gIter = controld_globals.transition_graph->synapses;
+ for (; gIter != NULL; gIter = gIter->next) {
+ pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data;
+
+ gIter2 = synapse->actions;
+ for (; gIter2 != NULL; gIter2 = gIter2->next) {
+ const char *task = NULL;
+ const char *target = NULL;
+ pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data;
+
+ task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
+ if (!pcmk__str_eq(CRMD_ACTION_CANCEL, task, pcmk__str_casei)) {
+ continue;
+ }
+
+ task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
+ if (!pcmk__str_eq(task, id, pcmk__str_casei)) {
+ crm_trace("Wrong key %s for %s on %s", task, id, node);
+ continue;
+ }
+
+ target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
+ if (node && !pcmk__str_eq(target, node, pcmk__str_casei)) {
+ crm_trace("Wrong node %s for %s on %s", target, id, node);
+ continue;
+ }
+
+ crm_trace("Found %s on %s", id, node);
+ return action;
+ }
+ }
+
+ return NULL;
+}
+
+bool
+confirm_cancel_action(const char *id, const char *node_id)
+{
+ const char *op_key = NULL;
+ const char *node_name = NULL;
+ pcmk__graph_action_t *cancel = get_cancel_action(id, node_id);
+
+ if (cancel == NULL) {
+ return FALSE;
+ }
+ op_key = crm_element_value(cancel->xml, XML_LRM_ATTR_TASK_KEY);
+ node_name = crm_element_value(cancel->xml, XML_LRM_ATTR_TARGET);
+
+ stop_te_timer(cancel);
+ te_action_confirmed(cancel, controld_globals.transition_graph);
+
+ crm_info("Cancellation of %s on %s confirmed (action %d)",
+ op_key, node_name, cancel->id);
+ return TRUE;
+}
+
+/* downed nodes are listed like: <downed> <node id="UUID1" /> ... </downed> */
+#define XPATH_DOWNED "//" XML_GRAPH_TAG_DOWNED \
+ "/" XML_CIB_TAG_NODE "[@" XML_ATTR_ID "='%s']"
+
+/*!
+ * \brief Find a transition event that would have made a specified node down
+ *
+ * \param[in] target UUID of node to match
+ *
+ * \return Matching event if found, NULL otherwise
+ */
+pcmk__graph_action_t *
+match_down_event(const char *target)
+{
+ pcmk__graph_action_t *match = NULL;
+ xmlXPathObjectPtr xpath_ret = NULL;
+ GList *gIter, *gIter2;
+
+ char *xpath = crm_strdup_printf(XPATH_DOWNED, target);
+
+ for (gIter = controld_globals.transition_graph->synapses;
+ gIter != NULL && match == NULL;
+ gIter = gIter->next) {
+
+ for (gIter2 = ((pcmk__graph_synapse_t * ) gIter->data)->actions;
+ gIter2 != NULL && match == NULL;
+ gIter2 = gIter2->next) {
+
+ match = (pcmk__graph_action_t *) gIter2->data;
+ if (pcmk_is_set(match->flags, pcmk__graph_action_executed)) {
+ xpath_ret = xpath_search(match->xml, xpath);
+ if (numXpathResults(xpath_ret) < 1) {
+ match = NULL;
+ }
+ freeXpathObject(xpath_ret);
+ } else {
+ // Only actions that were actually started can match
+ match = NULL;
+ }
+ }
+ }
+
+ free(xpath);
+
+ if (match != NULL) {
+ crm_debug("Shutdown action %d (%s) found for node %s", match->id,
+ crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY), target);
+ } else {
+ crm_debug("No reason to expect node %s to be down", target);
+ }
+ return match;
+}
+
+void
+process_graph_event(xmlNode *event, const char *event_node)
+{
+ int rc = -1; // Actual result
+ int target_rc = -1; // Expected result
+ int status = -1; // Executor status
+ int callid = -1; // Executor call ID
+ int transition_num = -1; // Transition number
+ int action_num = -1; // Action number within transition
+ char *update_te_uuid = NULL;
+ bool ignore_failures = FALSE;
+ const char *id = NULL;
+ const char *desc = NULL;
+ const char *magic = NULL;
+ const char *uname = NULL;
+
+ CRM_ASSERT(event != NULL);
+
+/*
+<lrm_rsc_op id="rsc_east-05_last_0" operation_key="rsc_east-05_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.6" transition-key="9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" transition-magic="0:7;9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" call-id="17" rc-code="7" op-status="0" interval="0" last-rc-change="1355361636" exec-time="128" queue-time="0" op-digest="c81f5f40b1c9e859c992e800b1aa6972"/>
+*/
+
+ magic = crm_element_value(event, XML_ATTR_TRANSITION_KEY);
+ if (magic == NULL) {
+ /* non-change */
+ return;
+ }
+
+ crm_element_value_int(event, XML_LRM_ATTR_OPSTATUS, &status);
+ if (status == PCMK_EXEC_PENDING) {
+ return;
+ }
+
+ id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
+ crm_element_value_int(event, XML_LRM_ATTR_RC, &rc);
+ crm_element_value_int(event, XML_LRM_ATTR_CALLID, &callid);
+
+ rc = pcmk__effective_rc(rc);
+
+ if (decode_transition_key(magic, &update_te_uuid, &transition_num,
+ &action_num, &target_rc) == FALSE) {
+ // decode_transition_key() already logged the bad key
+ crm_err("Can't process action %s result: Incompatible versions? "
+ CRM_XS " call-id=%d", id, callid);
+ abort_transition(INFINITY, pcmk__graph_restart, "Bad event", event);
+ return;
+ }
+
+ if (transition_num == -1) {
+ // E.g. crm_resource --fail
+ if (record_outside_event(action_num) != pcmk_rc_ok) {
+ crm_debug("Outside event with transition key '%s' has already been "
+ "processed", magic);
+ goto bail;
+ }
+ desc = "initiated outside of the cluster";
+ abort_transition(INFINITY, pcmk__graph_restart, "Unexpected event",
+ event);
+
+ } else if ((action_num < 0)
+ || !pcmk__str_eq(update_te_uuid, controld_globals.te_uuid,
+ pcmk__str_none)) {
+ desc = "initiated by a different DC";
+ abort_transition(INFINITY, pcmk__graph_restart, "Foreign event", event);
+
+ } else if ((controld_globals.transition_graph->id != transition_num)
+ || controld_globals.transition_graph->complete) {
+
+ // Action is not from currently active transition
+
+ guint interval_ms = 0;
+
+ if (parse_op_key(id, NULL, NULL, &interval_ms)
+ && (interval_ms != 0)) {
+ /* Recurring actions have the transition number they were first
+ * scheduled in.
+ */
+
+ if (status == PCMK_EXEC_CANCELLED) {
+ confirm_cancel_action(id, get_node_id(event));
+ goto bail;
+ }
+
+ desc = "arrived after initial scheduling";
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Change in recurring result", event);
+
+ } else if (controld_globals.transition_graph->id != transition_num) {
+ desc = "arrived really late";
+ abort_transition(INFINITY, pcmk__graph_restart, "Old event", event);
+ } else {
+ desc = "arrived late";
+ abort_transition(INFINITY, pcmk__graph_restart, "Inactive graph",
+ event);
+ }
+
+ } else {
+ // Event is result of an action from currently active transition
+ pcmk__graph_action_t *action = controld_get_action(action_num);
+
+ if (action == NULL) {
+ // Should never happen
+ desc = "unknown";
+ abort_transition(INFINITY, pcmk__graph_restart, "Unknown event",
+ event);
+
+ } else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
+ /* Nothing further needs to be done if the action has already been
+ * confirmed. This can happen e.g. when processing both an
+ * "xxx_last_0" or "xxx_last_failure_0" record as well as the main
+ * history record, which would otherwise result in incorrectly
+ * bumping the fail count twice.
+ */
+ crm_log_xml_debug(event, "Event already confirmed:");
+ goto bail;
+
+ } else {
+ /* An action result needs to be confirmed.
+ * (This is the only case where desc == NULL.)
+ */
+
+ if (pcmk__str_eq(crm_meta_value(action->params, XML_OP_ATTR_ON_FAIL), "ignore", pcmk__str_casei)) {
+ ignore_failures = TRUE;
+
+ } else if (rc != target_rc) {
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
+ }
+
+ stop_te_timer(action);
+ te_action_confirmed(action, controld_globals.transition_graph);
+
+ if (pcmk_is_set(action->flags, pcmk__graph_action_failed)) {
+ abort_transition(action->synapse->priority + 1,
+ pcmk__graph_restart, "Event failed", event);
+ }
+ }
+ }
+
+ if (id == NULL) {
+ id = "unknown action";
+ }
+ uname = crm_element_value(event, XML_LRM_ATTR_TARGET);
+ if (uname == NULL) {
+ uname = "unknown node";
+ }
+
+ if (status == PCMK_EXEC_INVALID) {
+ // We couldn't attempt the action
+ crm_info("Transition %d action %d (%s on %s): %s",
+ transition_num, action_num, id, uname,
+ pcmk_exec_status_str(status));
+
+ } else if (desc && update_failcount(event, event_node, rc, target_rc,
+ (transition_num == -1), FALSE)) {
+ crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
+ CRM_XS " target-rc=%d rc=%d call-id=%d event='%s'",
+ transition_num, action_num, id, uname,
+ services_ocf_exitcode_str(target_rc),
+ services_ocf_exitcode_str(rc),
+ target_rc, rc, callid, desc);
+
+ } else if (desc) {
+ crm_info("Transition %d action %d (%s on %s): %s "
+ CRM_XS " rc=%d target-rc=%d call-id=%d",
+ transition_num, action_num, id, uname,
+ desc, rc, target_rc, callid);
+
+ } else if (rc == target_rc) {
+ crm_info("Transition %d action %d (%s on %s) confirmed: %s "
+ CRM_XS " rc=%d call-id=%d",
+ transition_num, action_num, id, uname,
+ services_ocf_exitcode_str(rc), rc, callid);
+
+ } else {
+ update_failcount(event, event_node, rc, target_rc,
+ (transition_num == -1), ignore_failures);
+ crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
+ CRM_XS " target-rc=%d rc=%d call-id=%d",
+ transition_num, action_num, id, uname,
+ services_ocf_exitcode_str(target_rc),
+ services_ocf_exitcode_str(rc),
+ target_rc, rc, callid);
+ }
+
+ bail:
+ free(update_te_uuid);
+}
diff --git a/daemons/controld/controld_te_utils.c b/daemons/controld/controld_te_utils.c
new file mode 100644
index 0000000..ecbc0b2
--- /dev/null
+++ b/daemons/controld/controld_te_utils.c
@@ -0,0 +1,367 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+
+#include <pacemaker-controld.h>
+
+//! Triggers transition graph processing
+static crm_trigger_t *transition_trigger = NULL;
+
+gboolean
+stop_te_timer(pcmk__graph_action_t *action)
+{
+ if (action == NULL) {
+ return FALSE;
+ }
+ if (action->timer != 0) {
+ crm_trace("Stopping action timer");
+ g_source_remove(action->timer);
+ action->timer = 0;
+ } else {
+ crm_trace("Action timer was already stopped");
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static gboolean
+te_graph_trigger(gpointer user_data)
+{
+ if (controld_globals.transition_graph == NULL) {
+ crm_debug("Nothing to do");
+ return TRUE;
+ }
+
+ crm_trace("Invoking graph %d in state %s",
+ controld_globals.transition_graph->id,
+ fsa_state2string(controld_globals.fsa_state));
+
+ switch (controld_globals.fsa_state) {
+ case S_STARTING:
+ case S_PENDING:
+ case S_NOT_DC:
+ case S_HALT:
+ case S_ILLEGAL:
+ case S_STOPPING:
+ case S_TERMINATE:
+ return TRUE;
+ default:
+ break;
+ }
+
+ if (!controld_globals.transition_graph->complete) {
+ enum pcmk__graph_status graph_rc;
+ int orig_limit = controld_globals.transition_graph->batch_limit;
+ int throttled_limit = throttle_get_total_job_limit(orig_limit);
+
+ controld_globals.transition_graph->batch_limit = throttled_limit;
+ graph_rc = pcmk__execute_graph(controld_globals.transition_graph);
+ controld_globals.transition_graph->batch_limit = orig_limit;
+
+ if (graph_rc == pcmk__graph_active) {
+ crm_trace("Transition not yet complete");
+ return TRUE;
+
+ } else if (graph_rc == pcmk__graph_pending) {
+ crm_trace("Transition not yet complete - no actions fired");
+ return TRUE;
+ }
+
+ if (graph_rc != pcmk__graph_complete) {
+ crm_warn("Transition failed: %s",
+ pcmk__graph_status2text(graph_rc));
+ pcmk__log_graph(LOG_NOTICE, controld_globals.transition_graph);
+ }
+ }
+
+ crm_debug("Transition %d is now complete",
+ controld_globals.transition_graph->id);
+ controld_globals.transition_graph->complete = true;
+ notify_crmd(controld_globals.transition_graph);
+
+ return TRUE;
+}
+
+/*!
+ * \internal
+ * \brief Initialize transition trigger
+ */
+void
+controld_init_transition_trigger(void)
+{
+ transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger,
+ NULL);
+}
+
+/*!
+ * \internal
+ * \brief Destroy transition trigger
+ */
+void
+controld_destroy_transition_trigger(void)
+{
+ mainloop_destroy_trigger(transition_trigger);
+ transition_trigger = NULL;
+}
+
+void
+controld_trigger_graph_as(const char *fn, int line)
+{
+ crm_trace("%s:%d - Triggered graph processing", fn, line);
+ mainloop_set_trigger(transition_trigger);
+}
+
+static struct abort_timer_s {
+ bool aborted;
+ guint id;
+ int priority;
+ enum pcmk__graph_next action;
+ const char *text;
+} abort_timer = { 0, };
+
+static gboolean
+abort_timer_popped(gpointer data)
+{
+ if (AM_I_DC && (abort_timer.aborted == FALSE)) {
+ abort_transition(abort_timer.priority, abort_timer.action,
+ abort_timer.text, NULL);
+ }
+ abort_timer.id = 0;
+ return FALSE; // do not immediately reschedule timer
+}
+
+/*!
+ * \internal
+ * \brief Abort transition after delay, if not already aborted in that time
+ *
+ * \param[in] abort_text Must be literal string
+ */
+void
+abort_after_delay(int abort_priority, enum pcmk__graph_next abort_action,
+ const char *abort_text, guint delay_ms)
+{
+ if (abort_timer.id) {
+ // Timer already in progress, stop and reschedule
+ g_source_remove(abort_timer.id);
+ }
+ abort_timer.aborted = FALSE;
+ abort_timer.priority = abort_priority;
+ abort_timer.action = abort_action;
+ abort_timer.text = abort_text;
+ abort_timer.id = g_timeout_add(delay_ms, abort_timer_popped, NULL);
+}
+
+static const char *
+abort2text(enum pcmk__graph_next abort_action)
+{
+ switch (abort_action) {
+ case pcmk__graph_done: return "done";
+ case pcmk__graph_wait: return "stop";
+ case pcmk__graph_restart: return "restart";
+ case pcmk__graph_shutdown: return "shutdown";
+ }
+ return "unknown";
+}
+
+static bool
+update_abort_priority(pcmk__graph_t *graph, int priority,
+ enum pcmk__graph_next action, const char *abort_reason)
+{
+ bool change = FALSE;
+
+ if (graph == NULL) {
+ return change;
+ }
+
+ if (graph->abort_priority < priority) {
+ crm_debug("Abort priority upgraded from %d to %d", graph->abort_priority, priority);
+ graph->abort_priority = priority;
+ if (graph->abort_reason != NULL) {
+ crm_debug("'%s' abort superseded by %s", graph->abort_reason, abort_reason);
+ }
+ graph->abort_reason = abort_reason;
+ change = TRUE;
+ }
+
+ if (graph->completion_action < action) {
+ crm_debug("Abort action %s superseded by %s: %s",
+ abort2text(graph->completion_action), abort2text(action), abort_reason);
+ graph->completion_action = action;
+ change = TRUE;
+ }
+
+ return change;
+}
+
+void
+abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action,
+ const char *abort_text, const xmlNode *reason,
+ const char *fn, int line)
+{
+ int add[] = { 0, 0, 0 };
+ int del[] = { 0, 0, 0 };
+ int level = LOG_INFO;
+ const xmlNode *diff = NULL;
+ const xmlNode *change = NULL;
+
+ CRM_CHECK(controld_globals.transition_graph != NULL, return);
+
+ switch (controld_globals.fsa_state) {
+ case S_STARTING:
+ case S_PENDING:
+ case S_NOT_DC:
+ case S_HALT:
+ case S_ILLEGAL:
+ case S_STOPPING:
+ case S_TERMINATE:
+ crm_info("Abort %s suppressed: state=%s (%scomplete)",
+ abort_text, fsa_state2string(controld_globals.fsa_state),
+ (controld_globals.transition_graph->complete? "" : "in"));
+ return;
+ default:
+ break;
+ }
+
+ abort_timer.aborted = TRUE;
+ controld_expect_sched_reply(NULL);
+
+ if (!controld_globals.transition_graph->complete
+ && update_abort_priority(controld_globals.transition_graph,
+ abort_priority, abort_action,
+ abort_text)) {
+ level = LOG_NOTICE;
+ }
+
+ if (reason != NULL) {
+ const xmlNode *search = NULL;
+
+ for(search = reason; search; search = search->parent) {
+ if (pcmk__str_eq(XML_TAG_DIFF, TYPE(search), pcmk__str_casei)) {
+ diff = search;
+ break;
+ }
+ }
+
+ if(diff) {
+ xml_patch_versions(diff, add, del);
+ for(search = reason; search; search = search->parent) {
+ if (pcmk__str_eq(XML_DIFF_CHANGE, TYPE(search), pcmk__str_casei)) {
+ change = search;
+ break;
+ }
+ }
+ }
+ }
+
+ if (reason == NULL) {
+ do_crm_log(level,
+ "Transition %d aborted: %s " CRM_XS " source=%s:%d "
+ "complete=%s", controld_globals.transition_graph->id,
+ abort_text, fn, line,
+ pcmk__btoa(controld_globals.transition_graph->complete));
+
+ } else if(change == NULL) {
+ GString *local_path = pcmk__element_xpath(reason);
+ CRM_ASSERT(local_path != NULL);
+
+ do_crm_log(level, "Transition %d aborted by %s.%s: %s "
+ CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
+ controld_globals.transition_graph->id, TYPE(reason),
+ ID(reason), abort_text, add[0], add[1], add[2], fn, line,
+ (const char *) local_path->str,
+ pcmk__btoa(controld_globals.transition_graph->complete));
+ g_string_free(local_path, TRUE);
+
+ } else {
+ const char *kind = NULL;
+ const char *op = crm_element_value(change, XML_DIFF_OP);
+ const char *path = crm_element_value(change, XML_DIFF_PATH);
+
+ if(change == reason) {
+ if(strcmp(op, "create") == 0) {
+ reason = reason->children;
+
+ } else if(strcmp(op, "modify") == 0) {
+ reason = first_named_child(reason, XML_DIFF_RESULT);
+ if(reason) {
+ reason = reason->children;
+ }
+ }
+ }
+
+ kind = TYPE(reason);
+ if(strcmp(op, "delete") == 0) {
+ const char *shortpath = strrchr(path, '/');
+
+ do_crm_log(level, "Transition %d aborted by deletion of %s: %s "
+ CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
+ controld_globals.transition_graph->id,
+ (shortpath? (shortpath + 1) : path), abort_text,
+ add[0], add[1], add[2], fn, line, path,
+ pcmk__btoa(controld_globals.transition_graph->complete));
+
+ } else if (pcmk__str_eq(XML_CIB_TAG_NVPAIR, kind, pcmk__str_none)) {
+ do_crm_log(level, "Transition %d aborted by %s doing %s %s=%s: %s "
+ CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
+ controld_globals.transition_graph->id,
+ crm_element_value(reason, XML_ATTR_ID), op,
+ crm_element_value(reason, XML_NVPAIR_ATTR_NAME),
+ crm_element_value(reason, XML_NVPAIR_ATTR_VALUE),
+ abort_text, add[0], add[1], add[2], fn, line, path,
+ pcmk__btoa(controld_globals.transition_graph->complete));
+
+ } else if (pcmk__str_eq(XML_LRM_TAG_RSC_OP, kind, pcmk__str_none)) {
+ const char *magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC);
+
+ do_crm_log(level, "Transition %d aborted by operation %s '%s' on %s: %s "
+ CRM_XS " magic=%s cib=%d.%d.%d source=%s:%d complete=%s",
+ controld_globals.transition_graph->id,
+ crm_element_value(reason, XML_LRM_ATTR_TASK_KEY), op,
+ crm_element_value(reason, XML_LRM_ATTR_TARGET), abort_text,
+ magic, add[0], add[1], add[2], fn, line,
+ pcmk__btoa(controld_globals.transition_graph->complete));
+
+ } else if (pcmk__str_any_of(kind, XML_CIB_TAG_STATE, XML_CIB_TAG_NODE, NULL)) {
+ const char *uname = crm_peer_uname(ID(reason));
+
+ do_crm_log(level, "Transition %d aborted by %s '%s' on %s: %s "
+ CRM_XS " cib=%d.%d.%d source=%s:%d complete=%s",
+ controld_globals.transition_graph->id,
+ kind, op, (uname? uname : ID(reason)), abort_text,
+ add[0], add[1], add[2], fn, line,
+ pcmk__btoa(controld_globals.transition_graph->complete));
+
+ } else {
+ const char *id = ID(reason);
+
+ do_crm_log(level, "Transition %d aborted by %s.%s '%s': %s "
+ CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
+ controld_globals.transition_graph->id,
+ TYPE(reason), (id? id : ""), (op? op : "change"),
+ abort_text, add[0], add[1], add[2], fn, line, path,
+ pcmk__btoa(controld_globals.transition_graph->complete));
+ }
+ }
+
+ if (controld_globals.transition_graph->complete) {
+ if (controld_get_period_transition_timer() > 0) {
+ controld_stop_transition_timer();
+ controld_start_transition_timer();
+ } else {
+ register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
+ }
+ return;
+ }
+
+ trigger_graph();
+}
diff --git a/daemons/controld/controld_throttle.c b/daemons/controld/controld_throttle.c
new file mode 100644
index 0000000..5b7f9c0
--- /dev/null
+++ b/daemons/controld/controld_throttle.c
@@ -0,0 +1,574 @@
+/*
+ * Copyright 2013-2021 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <unistd.h>
+#include <ctype.h>
+#include <dirent.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/cluster.h>
+
+#include <pacemaker-controld.h>
+
+/* These values don't need to be bits, but these particular values must be kept
+ * for backward compatibility during rolling upgrades.
+ */
+enum throttle_state_e {
+ throttle_none = 0x0000,
+ throttle_low = 0x0001,
+ throttle_med = 0x0010,
+ throttle_high = 0x0100,
+ throttle_extreme = 0x1000,
+};
+
+struct throttle_record_s {
+ int max;
+ enum throttle_state_e mode;
+ char *node;
+};
+
+static int throttle_job_max = 0;
+static float throttle_load_target = 0.0;
+
+#define THROTTLE_FACTOR_LOW 1.2
+#define THROTTLE_FACTOR_MEDIUM 1.6
+#define THROTTLE_FACTOR_HIGH 2.0
+
+static GHashTable *throttle_records = NULL;
+static mainloop_timer_t *throttle_timer = NULL;
+
+static const char *
+load2str(enum throttle_state_e mode)
+{
+ switch (mode) {
+ case throttle_extreme: return "extreme";
+ case throttle_high: return "high";
+ case throttle_med: return "medium";
+ case throttle_low: return "low";
+ case throttle_none: return "negligible";
+ default: return "undetermined";
+ }
+}
+
+#if HAVE_LINUX_PROCFS
+/*!
+ * \internal
+ * \brief Return name of /proc file containing the CIB daemon's load statistics
+ *
+ * \return Newly allocated memory with file name on success, NULL otherwise
+ *
+ * \note It is the caller's responsibility to free the return value.
+ * This will return NULL if the daemon is being run via valgrind.
+ * This should be called only on Linux systems.
+ */
+static char *
+find_cib_loadfile(void)
+{
+ pid_t pid = pcmk__procfs_pid_of("pacemaker-based");
+
+ return pid? crm_strdup_printf("/proc/%lld/stat", (long long) pid) : NULL;
+}
+
+static bool
+throttle_cib_load(float *load)
+{
+/*
+ /proc/[pid]/stat
+ Status information about the process. This is used by ps(1). It is defined in /usr/src/linux/fs/proc/array.c.
+
+ The fields, in order, with their proper scanf(3) format specifiers, are:
+
+ pid %d (1) The process ID.
+
+ comm %s (2) The filename of the executable, in parentheses. This is visible whether or not the executable is swapped out.
+
+ state %c (3) One character from the string "RSDZTW" where R is running, S is sleeping in an interruptible wait, D is waiting in uninterruptible disk sleep, Z is zombie, T is traced or stopped (on a signal), and W is paging.
+
+ ppid %d (4) The PID of the parent.
+
+ pgrp %d (5) The process group ID of the process.
+
+ session %d (6) The session ID of the process.
+
+ tty_nr %d (7) The controlling terminal of the process. (The minor device number is contained in the combination of bits 31 to 20 and 7 to 0; the major device number is in bits 15 to 8.)
+
+ tpgid %d (8) The ID of the foreground process group of the controlling terminal of the process.
+
+ flags %u (%lu before Linux 2.6.22)
+ (9) The kernel flags word of the process. For bit meanings, see the PF_* defines in the Linux kernel source file include/linux/sched.h. Details depend on the kernel version.
+
+ minflt %lu (10) The number of minor faults the process has made which have not required loading a memory page from disk.
+
+ cminflt %lu (11) The number of minor faults that the process's waited-for children have made.
+
+ majflt %lu (12) The number of major faults the process has made which have required loading a memory page from disk.
+
+ cmajflt %lu (13) The number of major faults that the process's waited-for children have made.
+
+ utime %lu (14) Amount of time that this process has been scheduled in user mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)). This includes guest time, guest_time (time spent running a virtual CPU, see below), so that applications that are not aware of the guest time field do not lose that time from their calculations.
+
+ stime %lu (15) Amount of time that this process has been scheduled in kernel mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)).
+ */
+
+ static char *loadfile = NULL;
+ static time_t last_call = 0;
+ static long ticks_per_s = 0;
+ static unsigned long last_utime, last_stime;
+
+ char buffer[64*1024];
+ FILE *stream = NULL;
+ time_t now = time(NULL);
+
+ if(load == NULL) {
+ return FALSE;
+ } else {
+ *load = 0.0;
+ }
+
+ if(loadfile == NULL) {
+ last_call = 0;
+ last_utime = 0;
+ last_stime = 0;
+ loadfile = find_cib_loadfile();
+ if (loadfile == NULL) {
+ crm_warn("Couldn't find CIB load file");
+ return FALSE;
+ }
+ ticks_per_s = sysconf(_SC_CLK_TCK);
+ crm_trace("Found %s", loadfile);
+ }
+
+ stream = fopen(loadfile, "r");
+ if(stream == NULL) {
+ int rc = errno;
+
+ crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_strerror(rc), rc);
+ free(loadfile); loadfile = NULL;
+ return FALSE;
+ }
+
+ if(fgets(buffer, sizeof(buffer), stream)) {
+ char *comm = calloc(1, 256);
+ char state = 0;
+ int rc = 0, pid = 0, ppid = 0, pgrp = 0, session = 0, tty_nr = 0, tpgid = 0;
+ unsigned long flags = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0, utime = 0, stime = 0;
+
+ rc = sscanf(buffer, "%d %[^ ] %c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu",
+ &pid, comm, &state,
+ &ppid, &pgrp, &session, &tty_nr, &tpgid,
+ &flags, &minflt, &cminflt, &majflt, &cmajflt, &utime, &stime);
+ free(comm);
+
+ if(rc != 15) {
+ crm_err("Only %d of 15 fields found in %s", rc, loadfile);
+ fclose(stream);
+ return FALSE;
+
+ } else if(last_call > 0
+ && last_call < now
+ && last_utime <= utime
+ && last_stime <= stime) {
+
+ time_t elapsed = now - last_call;
+ unsigned long delta_utime = utime - last_utime;
+ unsigned long delta_stime = stime - last_stime;
+
+ *load = (delta_utime + delta_stime); /* Cast to a float before division */
+ *load /= ticks_per_s;
+ *load /= elapsed;
+ crm_debug("cib load: %f (%lu ticks in %lds)", *load, delta_utime + delta_stime, (long)elapsed);
+
+ } else {
+ crm_debug("Init %lu + %lu ticks at %ld (%lu tps)", utime, stime, (long)now, ticks_per_s);
+ }
+
+ last_call = now;
+ last_utime = utime;
+ last_stime = stime;
+
+ fclose(stream);
+ return TRUE;
+ }
+
+ fclose(stream);
+ return FALSE;
+}
+
+static bool
+throttle_load_avg(float *load)
+{
+ char buffer[256];
+ FILE *stream = NULL;
+ const char *loadfile = "/proc/loadavg";
+
+ if(load == NULL) {
+ return FALSE;
+ }
+
+ stream = fopen(loadfile, "r");
+ if(stream == NULL) {
+ int rc = errno;
+ crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_strerror(rc), rc);
+ return FALSE;
+ }
+
+ if(fgets(buffer, sizeof(buffer), stream)) {
+ char *nl = strstr(buffer, "\n");
+
+ /* Grab the 1-minute average, ignore the rest */
+ *load = strtof(buffer, NULL);
+ if(nl) { nl[0] = 0; }
+
+ fclose(stream);
+ return TRUE;
+ }
+
+ fclose(stream);
+ return FALSE;
+}
+
+/*!
+ * \internal
+ * \brief Check a load value against throttling thresholds
+ *
+ * \param[in] load Load value to check
+ * \param[in] desc Description of metric (for logging)
+ * \param[in] thresholds Low/medium/high/extreme thresholds
+ *
+ * \return Throttle mode corresponding to load value
+ */
+static enum throttle_state_e
+throttle_check_thresholds(float load, const char *desc,
+ const float thresholds[4])
+{
+ if (load > thresholds[3]) {
+ crm_notice("Extreme %s detected: %f", desc, load);
+ return throttle_extreme;
+
+ } else if (load > thresholds[2]) {
+ crm_notice("High %s detected: %f", desc, load);
+ return throttle_high;
+
+ } else if (load > thresholds[1]) {
+ crm_info("Moderate %s detected: %f", desc, load);
+ return throttle_med;
+
+ } else if (load > thresholds[0]) {
+ crm_debug("Noticeable %s detected: %f", desc, load);
+ return throttle_low;
+ }
+
+ crm_trace("Negligible %s detected: %f", desc, load);
+ return throttle_none;
+}
+
+static enum throttle_state_e
+throttle_handle_load(float load, const char *desc, int cores)
+{
+ float normalize;
+ float thresholds[4];
+
+ if (cores == 1) {
+ /* On a single core machine, a load of 1.0 is already too high */
+ normalize = 0.6;
+
+ } else {
+ /* Normalize the load to be per-core */
+ normalize = cores;
+ }
+ thresholds[0] = throttle_load_target * normalize * THROTTLE_FACTOR_LOW;
+ thresholds[1] = throttle_load_target * normalize * THROTTLE_FACTOR_MEDIUM;
+ thresholds[2] = throttle_load_target * normalize * THROTTLE_FACTOR_HIGH;
+ thresholds[3] = load + 1.0; /* never extreme */
+
+ return throttle_check_thresholds(load, desc, thresholds);
+}
+#endif // HAVE_LINUX_PROCFS
+
+static enum throttle_state_e
+throttle_mode(void)
+{
+ enum throttle_state_e mode = throttle_none;
+
+#if HAVE_LINUX_PROCFS
+ unsigned int cores;
+ float load;
+ float thresholds[4];
+
+ cores = pcmk__procfs_num_cores();
+ if(throttle_cib_load(&load)) {
+ float cib_max_cpu = 0.95;
+
+ /* The CIB is a single-threaded task and thus cannot consume
+ * more than 100% of a CPU (and 1/cores of the overall system
+ * load).
+ *
+ * On a many-cored system, the CIB might therefore be maxed out
+ * (causing operations to fail or appear to fail) even though
+ * the overall system load is still reasonable.
+ *
+ * Therefore, the 'normal' thresholds can not apply here, and we
+ * need a special case.
+ */
+ if(cores == 1) {
+ cib_max_cpu = 0.4;
+ }
+ if(throttle_load_target > 0.0 && throttle_load_target < cib_max_cpu) {
+ cib_max_cpu = throttle_load_target;
+ }
+
+ thresholds[0] = cib_max_cpu * 0.8;
+ thresholds[1] = cib_max_cpu * 0.9;
+ thresholds[2] = cib_max_cpu;
+ /* Can only happen on machines with a low number of cores */
+ thresholds[3] = cib_max_cpu * 1.5;
+
+ mode = throttle_check_thresholds(load, "CIB load", thresholds);
+ }
+
+ if(throttle_load_target <= 0) {
+ /* If we ever make this a valid value, the cluster will at least behave as expected */
+ return mode;
+ }
+
+ if(throttle_load_avg(&load)) {
+ enum throttle_state_e cpu_load;
+
+ cpu_load = throttle_handle_load(load, "CPU load", cores);
+ if (cpu_load > mode) {
+ mode = cpu_load;
+ }
+ crm_debug("Current load is %f across %u core(s)", load, cores);
+ }
+#endif // HAVE_LINUX_PROCFS
+ return mode;
+}
+
+static void
+throttle_send_command(enum throttle_state_e mode)
+{
+ xmlNode *xml = NULL;
+ static enum throttle_state_e last = -1;
+
+ if(mode != last) {
+ crm_info("New throttle mode: %s load (was %s)",
+ load2str(mode), load2str(last));
+ last = mode;
+
+ xml = create_request(CRM_OP_THROTTLE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
+ crm_xml_add_int(xml, F_CRM_THROTTLE_MODE, mode);
+ crm_xml_add_int(xml, F_CRM_THROTTLE_MAX, throttle_job_max);
+
+ send_cluster_message(NULL, crm_msg_crmd, xml, TRUE);
+ free_xml(xml);
+ }
+}
+
+static gboolean
+throttle_timer_cb(gpointer data)
+{
+ throttle_send_command(throttle_mode());
+ return TRUE;
+}
+
+static void
+throttle_record_free(gpointer p)
+{
+ struct throttle_record_s *r = p;
+ free(r->node);
+ free(r);
+}
+
+static void
+throttle_set_load_target(float target)
+{
+ throttle_load_target = target;
+}
+
+/*!
+ * \internal
+ * \brief Update the maximum number of simultaneous jobs
+ *
+ * \param[in] preference Cluster-wide node-action-limit from the CIB
+ */
+static void
+throttle_update_job_max(const char *preference)
+{
+ long long max = 0LL;
+ const char *env_limit = getenv("PCMK_node_action_limit");
+
+ if (env_limit != NULL) {
+ preference = env_limit; // Per-node override
+ }
+ if (preference != NULL) {
+ pcmk__scan_ll(preference, &max, 0LL);
+ }
+ if (max > 0) {
+ throttle_job_max = (int) max;
+ } else {
+ // Default is based on the number of cores detected
+ throttle_job_max = 2 * pcmk__procfs_num_cores();
+ }
+}
+
+void
+throttle_init(void)
+{
+ if(throttle_records == NULL) {
+ throttle_records = pcmk__strkey_table(NULL, throttle_record_free);
+ throttle_timer = mainloop_timer_add("throttle", 30 * 1000, TRUE, throttle_timer_cb, NULL);
+ }
+
+ throttle_update_job_max(NULL);
+ mainloop_timer_start(throttle_timer);
+}
+
+/*!
+ * \internal
+ * \brief Configure throttle options based on the CIB
+ *
+ * \param[in,out] options Name/value pairs for configured options
+ */
+void
+controld_configure_throttle(GHashTable *options)
+{
+ const char *value = g_hash_table_lookup(options, "load-threshold");
+
+ if (value != NULL) {
+ throttle_set_load_target(strtof(value, NULL) / 100.0);
+ }
+
+ value = g_hash_table_lookup(options, "node-action-limit");
+ throttle_update_job_max(value);
+}
+
+void
+throttle_fini(void)
+{
+ if (throttle_timer != NULL) {
+ mainloop_timer_del(throttle_timer);
+ throttle_timer = NULL;
+ }
+ if (throttle_records != NULL) {
+ g_hash_table_destroy(throttle_records);
+ throttle_records = NULL;
+ }
+}
+
+int
+throttle_get_total_job_limit(int l)
+{
+ /* Cluster-wide limit */
+ GHashTableIter iter;
+ int limit = l;
+ int peers = crm_active_peers();
+ struct throttle_record_s *r = NULL;
+
+ g_hash_table_iter_init(&iter, throttle_records);
+
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &r)) {
+ switch(r->mode) {
+
+ case throttle_extreme:
+ if(limit == 0 || limit > peers/4) {
+ limit = QB_MAX(1, peers/4);
+ }
+ break;
+
+ case throttle_high:
+ if(limit == 0 || limit > peers/2) {
+ limit = QB_MAX(1, peers/2);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ if(limit == l) {
+ /* crm_trace("No change to batch-limit=%d", limit); */
+
+ } else if(l == 0) {
+ crm_trace("Using batch-limit=%d", limit);
+
+ } else {
+ crm_trace("Using batch-limit=%d instead of %d", limit, l);
+ }
+ return limit;
+}
+
+int
+throttle_get_job_limit(const char *node)
+{
+ int jobs = 1;
+ struct throttle_record_s *r = NULL;
+
+ r = g_hash_table_lookup(throttle_records, node);
+ if(r == NULL) {
+ r = calloc(1, sizeof(struct throttle_record_s));
+ r->node = strdup(node);
+ r->mode = throttle_low;
+ r->max = throttle_job_max;
+ crm_trace("Defaulting to local values for unknown node %s", node);
+
+ g_hash_table_insert(throttle_records, r->node, r);
+ }
+
+ switch(r->mode) {
+ case throttle_extreme:
+ case throttle_high:
+ jobs = 1; /* At least one job must always be allowed */
+ break;
+ case throttle_med:
+ jobs = QB_MAX(1, r->max / 4);
+ break;
+ case throttle_low:
+ jobs = QB_MAX(1, r->max / 2);
+ break;
+ case throttle_none:
+ jobs = QB_MAX(1, r->max);
+ break;
+ default:
+ crm_err("Unknown throttle mode %.4x on %s", r->mode, node);
+ break;
+ }
+ return jobs;
+}
+
+void
+throttle_update(xmlNode *xml)
+{
+ int max = 0;
+ int mode = 0;
+ struct throttle_record_s *r = NULL;
+ const char *from = crm_element_value(xml, F_CRM_HOST_FROM);
+
+ crm_element_value_int(xml, F_CRM_THROTTLE_MODE, &mode);
+ crm_element_value_int(xml, F_CRM_THROTTLE_MAX, &max);
+
+ r = g_hash_table_lookup(throttle_records, from);
+
+ if(r == NULL) {
+ r = calloc(1, sizeof(struct throttle_record_s));
+ r->node = strdup(from);
+ g_hash_table_insert(throttle_records, r->node, r);
+ }
+
+ r->max = max;
+ r->mode = (enum throttle_state_e) mode;
+
+ crm_debug("Node %s has %s load and supports at most %d jobs; new job limit %d",
+ from, load2str((enum throttle_state_e) mode), max,
+ throttle_get_job_limit(from));
+}
diff --git a/daemons/controld/controld_throttle.h b/daemons/controld/controld_throttle.h
new file mode 100644
index 0000000..a798c6c
--- /dev/null
+++ b/daemons/controld/controld_throttle.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright 2013-2021 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+void throttle_init(void);
+void throttle_fini(void);
+void controld_configure_throttle(GHashTable *options);
+
+void throttle_update(xmlNode *xml);
+int throttle_get_job_limit(const char *node);
+int throttle_get_total_job_limit(int l);
diff --git a/daemons/controld/controld_timers.c b/daemons/controld/controld_timers.c
new file mode 100644
index 0000000..a65bef5
--- /dev/null
+++ b/daemons/controld/controld_timers.c
@@ -0,0 +1,509 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <time.h>
+#include <stdlib.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <pacemaker-controld.h>
+
+//! FSA mainloop timer type
+typedef struct fsa_timer_s {
+ guint source_id; //!< Timer source ID
+ guint period_ms; //!< Timer period
+ enum crmd_fsa_input fsa_input; //!< Input to register if timer pops
+ gboolean (*callback) (gpointer data); //!< What do if timer pops
+ bool log_error; //!< Timer popping indicates error
+ int counter; //!< For detecting loops
+} fsa_timer_t;
+
+//! Wait before retrying a failed cib or executor connection
+static fsa_timer_t *wait_timer = NULL;
+
+//! Periodically re-run scheduler (for date_spec evaluation and as a failsafe)
+static fsa_timer_t *recheck_timer = NULL;
+
+//! Wait at start-up, or after an election, for DC to make contact
+static fsa_timer_t *election_timer = NULL;
+
+//! Delay start of new transition with expectation something else might happen
+static fsa_timer_t *transition_timer = NULL;
+
+//! join-integration-timeout
+static fsa_timer_t *integration_timer = NULL;
+
+//! join-finalization-timeout
+static fsa_timer_t *finalization_timer = NULL;
+
+// Wait for DC to stop all resources and give us the all-clear to shut down
+fsa_timer_t *shutdown_escalation_timer = NULL;
+
+//! Cluster recheck interval (from configuration)
+static guint recheck_interval_ms = 0;
+
+static const char *
+get_timer_desc(fsa_timer_t * timer)
+{
+ if (timer == election_timer) {
+ return "Election Trigger";
+
+ } else if (timer == shutdown_escalation_timer) {
+ return "Shutdown Escalation";
+
+ } else if (timer == integration_timer) {
+ return "Integration Timer";
+
+ } else if (timer == finalization_timer) {
+ return "Finalization Timer";
+
+ } else if (timer == transition_timer) {
+ return "New Transition Timer";
+
+ } else if (timer == wait_timer) {
+ return "Wait Timer";
+
+ } else if (timer == recheck_timer) {
+ return "Cluster Recheck Timer";
+
+ }
+ return "Unknown Timer";
+}
+
+/*!
+ * \internal
+ * \brief Stop an FSA timer
+ *
+ * \param[in,out] timer Timer to stop
+ *
+ * \return true if the timer was running, or false otherwise
+ */
+static bool
+controld_stop_timer(fsa_timer_t *timer)
+{
+ CRM_CHECK(timer != NULL, return false);
+
+ if (timer->source_id != 0) {
+ crm_trace("Stopping %s (would inject %s if popped after %ums, src=%d)",
+ get_timer_desc(timer), fsa_input2string(timer->fsa_input),
+ timer->period_ms, timer->source_id);
+ g_source_remove(timer->source_id);
+ timer->source_id = 0;
+
+ } else {
+ crm_trace("%s already stopped (would inject %s if popped after %ums)",
+ get_timer_desc(timer), fsa_input2string(timer->fsa_input),
+ timer->period_ms);
+ return false;
+ }
+ return true;
+}
+
+/*!
+ * \internal
+ * \brief Start an FSA timer
+ *
+ * \param[in,out] timer Timer to start
+ */
+static void
+controld_start_timer(fsa_timer_t *timer)
+{
+ if (timer->source_id == 0 && timer->period_ms > 0) {
+ timer->source_id = g_timeout_add(timer->period_ms, timer->callback, (void *)timer);
+ CRM_ASSERT(timer->source_id != 0);
+ crm_debug("Started %s (inject %s if pops after %ums, source=%d)",
+ get_timer_desc(timer), fsa_input2string(timer->fsa_input),
+ timer->period_ms, timer->source_id);
+ } else {
+ crm_debug("%s already running (inject %s if pops after %ums, source=%d)",
+ get_timer_desc(timer), fsa_input2string(timer->fsa_input),
+ timer->period_ms, timer->source_id);
+ }
+}
+
+/* A_DC_TIMER_STOP, A_DC_TIMER_START,
+ * A_FINALIZE_TIMER_STOP, A_FINALIZE_TIMER_START
+ * A_INTEGRATE_TIMER_STOP, A_INTEGRATE_TIMER_START
+ */
+void
+do_timer_control(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ gboolean timer_op_ok = TRUE;
+
+ if (action & A_DC_TIMER_STOP) {
+ timer_op_ok = controld_stop_timer(election_timer);
+
+ } else if (action & A_FINALIZE_TIMER_STOP) {
+ timer_op_ok = controld_stop_timer(finalization_timer);
+
+ } else if (action & A_INTEGRATE_TIMER_STOP) {
+ timer_op_ok = controld_stop_timer(integration_timer);
+ }
+
+ /* don't start a timer that wasn't already running */
+ if (action & A_DC_TIMER_START && timer_op_ok) {
+ controld_start_timer(election_timer);
+ if (AM_I_DC) {
+ /* there can be only one */
+ register_fsa_input(cause, I_ELECTION, NULL);
+ }
+
+ } else if (action & A_FINALIZE_TIMER_START) {
+ controld_start_timer(finalization_timer);
+
+ } else if (action & A_INTEGRATE_TIMER_START) {
+ controld_start_timer(integration_timer);
+ }
+}
+
+static gboolean
+crm_timer_popped(gpointer data)
+{
+ fsa_timer_t *timer = (fsa_timer_t *) data;
+
+ if (timer->log_error) {
+ crm_err("%s just popped in state %s! " CRM_XS " input=%s time=%ums",
+ get_timer_desc(timer),
+ fsa_state2string(controld_globals.fsa_state),
+ fsa_input2string(timer->fsa_input), timer->period_ms);
+ } else {
+ crm_info("%s just popped " CRM_XS " input=%s time=%ums",
+ get_timer_desc(timer), fsa_input2string(timer->fsa_input),
+ timer->period_ms);
+ timer->counter++;
+ }
+
+ if ((timer == election_timer) && (election_timer->counter > 5)) {
+ crm_notice("We appear to be in an election loop, something may be wrong");
+ crm_write_blackbox(0, NULL);
+ election_timer->counter = 0;
+ }
+
+ controld_stop_timer(timer); // Make timer _not_ go off again
+
+ if (timer->fsa_input == I_INTEGRATED) {
+ crm_info("Welcomed: %d, Integrated: %d",
+ crmd_join_phase_count(crm_join_welcomed),
+ crmd_join_phase_count(crm_join_integrated));
+ if (crmd_join_phase_count(crm_join_welcomed) == 0) {
+ // If we don't even have ourselves, start again
+ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, NULL,
+ __func__);
+
+ } else {
+ register_fsa_input_before(C_TIMER_POPPED, timer->fsa_input, NULL);
+ }
+
+ } else if ((timer == recheck_timer)
+ && (controld_globals.fsa_state != S_IDLE)) {
+ crm_debug("Discarding %s event in state: %s",
+ fsa_input2string(timer->fsa_input),
+ fsa_state2string(controld_globals.fsa_state));
+
+ } else if ((timer == finalization_timer)
+ && (controld_globals.fsa_state != S_FINALIZE_JOIN)) {
+ crm_debug("Discarding %s event in state: %s",
+ fsa_input2string(timer->fsa_input),
+ fsa_state2string(controld_globals.fsa_state));
+
+ } else if (timer->fsa_input != I_NULL) {
+ register_fsa_input(C_TIMER_POPPED, timer->fsa_input, NULL);
+ }
+
+ controld_trigger_fsa();
+
+ return TRUE;
+}
+
+bool
+controld_init_fsa_timers(void)
+{
+ transition_timer = calloc(1, sizeof(fsa_timer_t));
+ if (transition_timer == NULL) {
+ return FALSE;
+ }
+
+ integration_timer = calloc(1, sizeof(fsa_timer_t));
+ if (integration_timer == NULL) {
+ return FALSE;
+ }
+
+ finalization_timer = calloc(1, sizeof(fsa_timer_t));
+ if (finalization_timer == NULL) {
+ return FALSE;
+ }
+
+ election_timer = calloc(1, sizeof(fsa_timer_t));
+ if (election_timer == NULL) {
+ return FALSE;
+ }
+
+ shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t));
+ if (shutdown_escalation_timer == NULL) {
+ return FALSE;
+ }
+
+ wait_timer = calloc(1, sizeof(fsa_timer_t));
+ if (wait_timer == NULL) {
+ return FALSE;
+ }
+
+ recheck_timer = calloc(1, sizeof(fsa_timer_t));
+ if (recheck_timer == NULL) {
+ return FALSE;
+ }
+
+ election_timer->source_id = 0;
+ election_timer->period_ms = 0;
+ election_timer->fsa_input = I_DC_TIMEOUT;
+ election_timer->callback = crm_timer_popped;
+ election_timer->log_error = FALSE;
+
+ transition_timer->source_id = 0;
+ transition_timer->period_ms = 0;
+ transition_timer->fsa_input = I_PE_CALC;
+ transition_timer->callback = crm_timer_popped;
+ transition_timer->log_error = FALSE;
+
+ integration_timer->source_id = 0;
+ integration_timer->period_ms = 0;
+ integration_timer->fsa_input = I_INTEGRATED;
+ integration_timer->callback = crm_timer_popped;
+ integration_timer->log_error = TRUE;
+
+ finalization_timer->source_id = 0;
+ finalization_timer->period_ms = 0;
+ finalization_timer->fsa_input = I_FINALIZED;
+ finalization_timer->callback = crm_timer_popped;
+ finalization_timer->log_error = FALSE;
+
+ /* We can't use I_FINALIZED here, because that creates a bug in the join
+ * process where a joining node can be stuck in S_PENDING while we think it
+ * is in S_NOT_DC. This created an infinite transition loop in which we
+ * continually send probes which the node NACKs because it's pending.
+ *
+ * If we have nodes where the cluster layer is active but the controller is
+ * not, we can avoid this causing an election/join loop, in the integration
+ * phase.
+ */
+ finalization_timer->fsa_input = I_ELECTION;
+
+ shutdown_escalation_timer->source_id = 0;
+ shutdown_escalation_timer->period_ms = 0;
+ shutdown_escalation_timer->fsa_input = I_STOP;
+ shutdown_escalation_timer->callback = crm_timer_popped;
+ shutdown_escalation_timer->log_error = TRUE;
+
+ wait_timer->source_id = 0;
+ wait_timer->period_ms = 2000;
+ wait_timer->fsa_input = I_NULL;
+ wait_timer->callback = crm_timer_popped;
+ wait_timer->log_error = FALSE;
+
+ recheck_timer->source_id = 0;
+ recheck_timer->period_ms = 0;
+ recheck_timer->fsa_input = I_PE_CALC;
+ recheck_timer->callback = crm_timer_popped;
+ recheck_timer->log_error = FALSE;
+
+ return TRUE;
+}
+
+/*!
+ * \internal
+ * \brief Configure timers based on the CIB
+ *
+ * \param[in,out] options Name/value pairs for configured options
+ */
+void
+controld_configure_fsa_timers(GHashTable *options)
+{
+ const char *value = NULL;
+
+ // Election timer
+ value = g_hash_table_lookup(options, XML_CONFIG_ATTR_DC_DEADTIME);
+ election_timer->period_ms = crm_parse_interval_spec(value);
+
+ // Integration timer
+ value = g_hash_table_lookup(options, "join-integration-timeout");
+ integration_timer->period_ms = crm_parse_interval_spec(value);
+
+ // Finalization timer
+ value = g_hash_table_lookup(options, "join-finalization-timeout");
+ finalization_timer->period_ms = crm_parse_interval_spec(value);
+
+ // Shutdown escalation timer
+ value = g_hash_table_lookup(options, XML_CONFIG_ATTR_FORCE_QUIT);
+ shutdown_escalation_timer->period_ms = crm_parse_interval_spec(value);
+ crm_debug("Shutdown escalation occurs if DC has not responded to request "
+ "in %ums", shutdown_escalation_timer->period_ms);
+
+ // Transition timer
+ value = g_hash_table_lookup(options, "transition-delay");
+ transition_timer->period_ms = crm_parse_interval_spec(value);
+
+ // Recheck interval
+ value = g_hash_table_lookup(options, XML_CONFIG_ATTR_RECHECK);
+ recheck_interval_ms = crm_parse_interval_spec(value);
+ crm_debug("Re-run scheduler after %dms of inactivity", recheck_interval_ms);
+}
+
+void
+controld_free_fsa_timers(void)
+{
+ controld_stop_timer(transition_timer);
+ controld_stop_timer(integration_timer);
+ controld_stop_timer(finalization_timer);
+ controld_stop_timer(election_timer);
+ controld_stop_timer(shutdown_escalation_timer);
+ controld_stop_timer(wait_timer);
+ controld_stop_timer(recheck_timer);
+
+ free(transition_timer); transition_timer = NULL;
+ free(integration_timer); integration_timer = NULL;
+ free(finalization_timer); finalization_timer = NULL;
+ free(election_timer); election_timer = NULL;
+ free(shutdown_escalation_timer); shutdown_escalation_timer = NULL;
+ free(wait_timer); wait_timer = NULL;
+ free(recheck_timer); recheck_timer = NULL;
+}
+
+/*!
+ * \internal
+ * \brief Check whether the transition timer is started
+ * \return true if the transition timer is started, or false otherwise
+ */
+bool
+controld_is_started_transition_timer(void)
+{
+ return (transition_timer->period_ms > 0)
+ && (transition_timer->source_id != 0);
+}
+
+/*!
+ * \internal
+ * \brief Start the recheck timer
+ */
+void
+controld_start_recheck_timer(void)
+{
+ // Default to recheck interval configured in CIB (if any)
+ guint period_ms = recheck_interval_ms;
+
+ // If scheduler supplied a "recheck by" time, check whether that's sooner
+ if (controld_globals.transition_graph->recheck_by > 0) {
+ time_t diff_seconds = controld_globals.transition_graph->recheck_by
+ - time(NULL);
+
+ if (diff_seconds < 1) {
+ // We're already past the desired time
+ period_ms = 500;
+ } else {
+ period_ms = (guint) diff_seconds * 1000;
+ }
+
+ // Use "recheck by" only if it's sooner than interval from CIB
+ if (period_ms > recheck_interval_ms) {
+ period_ms = recheck_interval_ms;
+ }
+ }
+
+ if (period_ms > 0) {
+ recheck_timer->period_ms = period_ms;
+ controld_start_timer(recheck_timer);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Start the wait timer
+ */
+void
+controld_start_wait_timer(void)
+{
+ controld_start_timer(wait_timer);
+}
+
+/*!
+ * \internal
+ * \brief Stop the recheck timer
+ *
+ * \return true if the recheck timer was running, or false otherwise
+ */
+bool
+controld_stop_recheck_timer(void)
+{
+ return controld_stop_timer(recheck_timer);
+}
+
+/*!
+ * \brief Get the transition timer's configured period
+ * \return The transition_timer's period
+ */
+guint
+controld_get_period_transition_timer(void)
+{
+ return transition_timer->period_ms;
+}
+
+/*!
+ * \internal
+ * \brief Reset the election timer's counter to 0
+ */
+void
+controld_reset_counter_election_timer(void)
+{
+ election_timer->counter = 0;
+}
+
+/*!
+ * \internal
+ * \brief Stop the transition timer
+ *
+ * \return true if the transition timer was running, or false otherwise
+ */
+bool
+controld_stop_transition_timer(void)
+{
+ return controld_stop_timer(transition_timer);
+}
+
+/*!
+ * \internal
+ * \brief Start the transition timer
+ */
+void
+controld_start_transition_timer(void)
+{
+ controld_start_timer(transition_timer);
+}
+
+/*!
+ * \internal
+ * \brief Start the countdown sequence for a shutdown
+ *
+ * \param[in] default_period_ms Period to use if the shutdown escalation
+ * timer's period is 0
+ */
+void
+controld_shutdown_start_countdown(guint default_period_ms)
+{
+ if (shutdown_escalation_timer->period_ms == 0) {
+ shutdown_escalation_timer->period_ms = default_period_ms;
+ }
+
+ crm_notice("Initiating controller shutdown sequence " CRM_XS " limit=%ums",
+ shutdown_escalation_timer->period_ms);
+ controld_start_timer(shutdown_escalation_timer);
+}
diff --git a/daemons/controld/controld_timers.h b/daemons/controld/controld_timers.h
new file mode 100644
index 0000000..587f4d1
--- /dev/null
+++ b/daemons/controld/controld_timers.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef CONTROLD_TIMERS__H
+# define CONTROLD_TIMERS__H
+
+# include <stdbool.h> // bool
+# include <glib.h> // gboolean, gpointer, guint
+# include <controld_fsa.h> // crmd_fsa_input
+
+bool controld_init_fsa_timers(void);
+void controld_free_fsa_timers(void);
+void controld_configure_fsa_timers(GHashTable *options);
+
+bool controld_stop_recheck_timer(void);
+bool controld_stop_transition_timer(void);
+
+void controld_start_recheck_timer(void);
+void controld_start_transition_timer(void);
+void controld_start_wait_timer(void);
+
+bool controld_is_started_transition_timer(void);
+
+guint controld_get_period_transition_timer(void);
+
+void controld_reset_counter_election_timer(void);
+
+void controld_shutdown_start_countdown(guint default_period_ms);
+
+#endif
diff --git a/daemons/controld/controld_transition.c b/daemons/controld/controld_transition.c
new file mode 100644
index 0000000..c8a342c
--- /dev/null
+++ b/daemons/controld/controld_transition.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+
+#include <pacemaker-controld.h>
+
+static void
+global_cib_callback(const xmlNode * msg, int callid, int rc, xmlNode * output)
+{
+}
+
+static pcmk__graph_t *
+create_blank_graph(void)
+{
+ pcmk__graph_t *a_graph = pcmk__unpack_graph(NULL, NULL);
+
+ a_graph->complete = true;
+ a_graph->abort_reason = "DC Takeover";
+ a_graph->completion_action = pcmk__graph_restart;
+ return a_graph;
+}
+
+/* A_TE_START, A_TE_STOP, O_TE_RESTART */
+void
+do_te_control(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ cib_t *cib_conn = controld_globals.cib_conn;
+ gboolean init_ok = TRUE;
+
+ if (pcmk_is_set(action, A_TE_STOP)) {
+ pcmk__free_graph(controld_globals.transition_graph);
+ controld_globals.transition_graph = NULL;
+
+ if (cib_conn != NULL) {
+ cib_conn->cmds->del_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY,
+ te_update_diff);
+ }
+
+ controld_clear_fsa_input_flags(R_TE_CONNECTED);
+ crm_info("Transitioner is now inactive");
+ }
+
+ if ((action & A_TE_START) == 0) {
+ return;
+
+ } else if (pcmk_is_set(controld_globals.fsa_input_register,
+ R_TE_CONNECTED)) {
+ crm_debug("The transitioner is already active");
+ return;
+
+ } else if ((action & A_TE_START) && cur_state == S_STOPPING) {
+ crm_info("Ignoring request to start the transitioner while shutting down");
+ return;
+ }
+
+ if (controld_globals.te_uuid == NULL) {
+ controld_globals.te_uuid = crm_generate_uuid();
+ crm_info("Registering TE UUID: %s", controld_globals.te_uuid);
+ }
+
+ if (cib_conn == NULL) {
+ crm_err("Could not set CIB callbacks");
+ init_ok = FALSE;
+
+ } else {
+ if (cib_conn->cmds->add_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY,
+ te_update_diff) != pcmk_ok) {
+ crm_err("Could not set CIB notification callback");
+ init_ok = FALSE;
+ }
+
+ if (cib_conn->cmds->set_op_callback(cib_conn,
+ global_cib_callback) != pcmk_ok) {
+ crm_err("Could not set CIB global callback");
+ init_ok = FALSE;
+ }
+ }
+
+ if (init_ok) {
+ controld_register_graph_functions();
+ pcmk__free_graph(controld_globals.transition_graph);
+
+ /* create a blank one */
+ crm_debug("Transitioner is now active");
+ controld_globals.transition_graph = create_blank_graph();
+ controld_set_fsa_input_flags(R_TE_CONNECTED);
+ }
+}
+
+/* A_TE_INVOKE, A_TE_CANCEL */
+void
+do_te_invoke(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+
+ if (!AM_I_DC
+ || ((controld_globals.fsa_state != S_TRANSITION_ENGINE)
+ && pcmk_is_set(action, A_TE_INVOKE))) {
+ crm_notice("No need to invoke the TE (%s) in state %s",
+ fsa_action2string(action),
+ fsa_state2string(controld_globals.fsa_state));
+ return;
+ }
+
+ if (action & A_TE_CANCEL) {
+ crm_debug("Cancelling the transition: %sactive",
+ controld_globals.transition_graph->complete? "in" : "");
+ abort_transition(INFINITY, pcmk__graph_restart, "Peer Cancelled", NULL);
+ if (!controld_globals.transition_graph->complete) {
+ crmd_fsa_stall(FALSE);
+ }
+
+ } else if (action & A_TE_HALT) {
+ abort_transition(INFINITY, pcmk__graph_wait, "Peer Halt", NULL);
+ if (!controld_globals.transition_graph->complete) {
+ crmd_fsa_stall(FALSE);
+ }
+
+ } else if (action & A_TE_INVOKE) {
+ ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
+ xmlNode *graph_data = input->xml;
+ const char *ref = crm_element_value(input->msg, XML_ATTR_REFERENCE);
+ const char *graph_file = crm_element_value(input->msg, F_CRM_TGRAPH);
+ const char *graph_input = crm_element_value(input->msg, F_CRM_TGRAPH_INPUT);
+
+ if (graph_file == NULL && graph_data == NULL) {
+ crm_log_xml_err(input->msg, "Bad command");
+ register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
+ return;
+ }
+
+ if (!controld_globals.transition_graph->complete) {
+ crm_info("Another transition is already active");
+ abort_transition(INFINITY, pcmk__graph_restart, "Transition Active",
+ NULL);
+ return;
+ }
+
+ if ((controld_globals.fsa_pe_ref == NULL)
+ || !pcmk__str_eq(controld_globals.fsa_pe_ref, ref,
+ pcmk__str_none)) {
+ crm_info("Transition is redundant: %s expected but %s received",
+ pcmk__s(controld_globals.fsa_pe_ref, "no reference"),
+ pcmk__s(ref, "no reference"));
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Transition Redundant", NULL);
+ }
+
+ if (graph_data == NULL && graph_file != NULL) {
+ graph_data = filename2xml(graph_file);
+ }
+
+ if (controld_is_started_transition_timer()) {
+ crm_debug("The transitioner wait for a transition timer");
+ return;
+ }
+
+ CRM_CHECK(graph_data != NULL,
+ crm_err("Input raised by %s is invalid", msg_data->origin);
+ crm_log_xml_err(input->msg, "Bad command");
+ return);
+
+ pcmk__free_graph(controld_globals.transition_graph);
+ controld_globals.transition_graph = pcmk__unpack_graph(graph_data,
+ graph_input);
+ CRM_CHECK(controld_globals.transition_graph != NULL,
+ controld_globals.transition_graph = create_blank_graph();
+ return);
+ crm_info("Processing graph %d (ref=%s) derived from %s",
+ controld_globals.transition_graph->id, ref, graph_input);
+
+ te_reset_job_counts();
+
+ trigger_graph();
+ pcmk__log_graph(LOG_TRACE, controld_globals.transition_graph);
+
+ if (graph_data != input->xml) {
+ free_xml(graph_data);
+ }
+ }
+}
diff --git a/daemons/controld/controld_transition.h b/daemons/controld/controld_transition.h
new file mode 100644
index 0000000..2da4221
--- /dev/null
+++ b/daemons/controld/controld_transition.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef TENGINE__H
+# define TENGINE__H
+
+# include <crm/common/mainloop.h>
+# include <crm/stonith-ng.h>
+# include <crm/services.h>
+# include <pacemaker-internal.h>
+
+/* tengine */
+pcmk__graph_action_t *match_down_event(const char *target);
+pcmk__graph_action_t *get_cancel_action(const char *id, const char *node);
+bool confirm_cancel_action(const char *id, const char *node_id);
+
+void controld_record_action_timeout(pcmk__graph_action_t *action);
+
+void controld_destroy_outside_events_table(void);
+void controld_remove_all_outside_events(void);
+
+gboolean fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node);
+void process_graph_event(xmlNode *event, const char *event_node);
+
+/* utils */
+pcmk__graph_action_t *controld_get_action(int id);
+gboolean stop_te_timer(pcmk__graph_action_t *action);
+const char *get_rsc_state(const char *task, enum pcmk_exec_status status);
+
+void process_te_message(xmlNode *msg, xmlNode *xml_data);
+
+void controld_register_graph_functions(void);
+
+void notify_crmd(pcmk__graph_t * graph);
+
+void cib_action_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
+ void *user_data);
+gboolean action_timer_callback(gpointer data);
+void te_update_diff(const char *event, xmlNode *msg);
+
+void controld_init_transition_trigger(void);
+void controld_destroy_transition_trigger(void);
+
+void controld_trigger_graph_as(const char *fn, int line);
+void abort_after_delay(int abort_priority, enum pcmk__graph_next abort_action,
+ const char *abort_text, guint delay_ms);
+void abort_transition_graph(int abort_priority,
+ enum pcmk__graph_next abort_action,
+ const char *abort_text, const xmlNode *reason,
+ const char *fn, int line);
+
+# define trigger_graph() controld_trigger_graph_as(__func__, __LINE__)
+# define abort_transition(pri, action, text, reason) \
+ abort_transition_graph(pri, action, text, reason,__func__,__LINE__);
+
+void te_action_confirmed(pcmk__graph_action_t *action, pcmk__graph_t *graph);
+void te_reset_job_counts(void);
+
+#endif
diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c
new file mode 100644
index 0000000..4ce09d9
--- /dev/null
+++ b/daemons/controld/controld_utils.c
@@ -0,0 +1,837 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdlib.h>
+#include <stdint.h> // uint64_t
+
+#include <crm/crm.h>
+#include <crm/cib.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+
+#include <pacemaker-controld.h>
+
+const char *
+fsa_input2string(enum crmd_fsa_input input)
+{
+ const char *inputAsText = NULL;
+
+ switch (input) {
+ case I_NULL:
+ inputAsText = "I_NULL";
+ break;
+ case I_CIB_OP:
+ inputAsText = "I_CIB_OP (unused)";
+ break;
+ case I_CIB_UPDATE:
+ inputAsText = "I_CIB_UPDATE";
+ break;
+ case I_DC_TIMEOUT:
+ inputAsText = "I_DC_TIMEOUT";
+ break;
+ case I_ELECTION:
+ inputAsText = "I_ELECTION";
+ break;
+ case I_PE_CALC:
+ inputAsText = "I_PE_CALC";
+ break;
+ case I_RELEASE_DC:
+ inputAsText = "I_RELEASE_DC";
+ break;
+ case I_ELECTION_DC:
+ inputAsText = "I_ELECTION_DC";
+ break;
+ case I_ERROR:
+ inputAsText = "I_ERROR";
+ break;
+ case I_FAIL:
+ inputAsText = "I_FAIL";
+ break;
+ case I_INTEGRATED:
+ inputAsText = "I_INTEGRATED";
+ break;
+ case I_FINALIZED:
+ inputAsText = "I_FINALIZED";
+ break;
+ case I_NODE_JOIN:
+ inputAsText = "I_NODE_JOIN";
+ break;
+ case I_JOIN_OFFER:
+ inputAsText = "I_JOIN_OFFER";
+ break;
+ case I_JOIN_REQUEST:
+ inputAsText = "I_JOIN_REQUEST";
+ break;
+ case I_JOIN_RESULT:
+ inputAsText = "I_JOIN_RESULT";
+ break;
+ case I_NOT_DC:
+ inputAsText = "I_NOT_DC";
+ break;
+ case I_RECOVERED:
+ inputAsText = "I_RECOVERED";
+ break;
+ case I_RELEASE_FAIL:
+ inputAsText = "I_RELEASE_FAIL";
+ break;
+ case I_RELEASE_SUCCESS:
+ inputAsText = "I_RELEASE_SUCCESS";
+ break;
+ case I_RESTART:
+ inputAsText = "I_RESTART";
+ break;
+ case I_PE_SUCCESS:
+ inputAsText = "I_PE_SUCCESS";
+ break;
+ case I_ROUTER:
+ inputAsText = "I_ROUTER";
+ break;
+ case I_SHUTDOWN:
+ inputAsText = "I_SHUTDOWN";
+ break;
+ case I_STARTUP:
+ inputAsText = "I_STARTUP";
+ break;
+ case I_TE_SUCCESS:
+ inputAsText = "I_TE_SUCCESS";
+ break;
+ case I_STOP:
+ inputAsText = "I_STOP";
+ break;
+ case I_DC_HEARTBEAT:
+ inputAsText = "I_DC_HEARTBEAT";
+ break;
+ case I_WAIT_FOR_EVENT:
+ inputAsText = "I_WAIT_FOR_EVENT";
+ break;
+ case I_LRM_EVENT:
+ inputAsText = "I_LRM_EVENT";
+ break;
+ case I_PENDING:
+ inputAsText = "I_PENDING";
+ break;
+ case I_HALT:
+ inputAsText = "I_HALT";
+ break;
+ case I_TERMINATE:
+ inputAsText = "I_TERMINATE";
+ break;
+ case I_ILLEGAL:
+ inputAsText = "I_ILLEGAL";
+ break;
+ }
+
+ if (inputAsText == NULL) {
+ crm_err("Input %d is unknown", input);
+ inputAsText = "<UNKNOWN_INPUT>";
+ }
+
+ return inputAsText;
+}
+
+const char *
+fsa_state2string(enum crmd_fsa_state state)
+{
+ const char *stateAsText = NULL;
+
+ switch (state) {
+ case S_IDLE:
+ stateAsText = "S_IDLE";
+ break;
+ case S_ELECTION:
+ stateAsText = "S_ELECTION";
+ break;
+ case S_INTEGRATION:
+ stateAsText = "S_INTEGRATION";
+ break;
+ case S_FINALIZE_JOIN:
+ stateAsText = "S_FINALIZE_JOIN";
+ break;
+ case S_NOT_DC:
+ stateAsText = "S_NOT_DC";
+ break;
+ case S_POLICY_ENGINE:
+ stateAsText = "S_POLICY_ENGINE";
+ break;
+ case S_RECOVERY:
+ stateAsText = "S_RECOVERY";
+ break;
+ case S_RELEASE_DC:
+ stateAsText = "S_RELEASE_DC";
+ break;
+ case S_PENDING:
+ stateAsText = "S_PENDING";
+ break;
+ case S_STOPPING:
+ stateAsText = "S_STOPPING";
+ break;
+ case S_TERMINATE:
+ stateAsText = "S_TERMINATE";
+ break;
+ case S_TRANSITION_ENGINE:
+ stateAsText = "S_TRANSITION_ENGINE";
+ break;
+ case S_STARTING:
+ stateAsText = "S_STARTING";
+ break;
+ case S_HALT:
+ stateAsText = "S_HALT";
+ break;
+ case S_ILLEGAL:
+ stateAsText = "S_ILLEGAL";
+ break;
+ }
+
+ if (stateAsText == NULL) {
+ crm_err("State %d is unknown", state);
+ stateAsText = "<UNKNOWN_STATE>";
+ }
+
+ return stateAsText;
+}
+
+const char *
+fsa_cause2string(enum crmd_fsa_cause cause)
+{
+ const char *causeAsText = NULL;
+
+ switch (cause) {
+ case C_UNKNOWN:
+ causeAsText = "C_UNKNOWN";
+ break;
+ case C_STARTUP:
+ causeAsText = "C_STARTUP";
+ break;
+ case C_IPC_MESSAGE:
+ causeAsText = "C_IPC_MESSAGE";
+ break;
+ case C_HA_MESSAGE:
+ causeAsText = "C_HA_MESSAGE";
+ break;
+ case C_TIMER_POPPED:
+ causeAsText = "C_TIMER_POPPED";
+ break;
+ case C_SHUTDOWN:
+ causeAsText = "C_SHUTDOWN";
+ break;
+ case C_LRM_OP_CALLBACK:
+ causeAsText = "C_LRM_OP_CALLBACK";
+ break;
+ case C_CRMD_STATUS_CALLBACK:
+ causeAsText = "C_CRMD_STATUS_CALLBACK";
+ break;
+ case C_FSA_INTERNAL:
+ causeAsText = "C_FSA_INTERNAL";
+ break;
+ }
+
+ if (causeAsText == NULL) {
+ crm_err("Cause %d is unknown", cause);
+ causeAsText = "<UNKNOWN_CAUSE>";
+ }
+
+ return causeAsText;
+}
+
+const char *
+fsa_action2string(long long action)
+{
+ const char *actionAsText = NULL;
+
+ switch (action) {
+
+ case A_NOTHING:
+ actionAsText = "A_NOTHING";
+ break;
+ case A_ELECTION_START:
+ actionAsText = "A_ELECTION_START";
+ break;
+ case A_DC_JOIN_FINAL:
+ actionAsText = "A_DC_JOIN_FINAL";
+ break;
+ case A_READCONFIG:
+ actionAsText = "A_READCONFIG";
+ break;
+ case O_RELEASE:
+ actionAsText = "O_RELEASE";
+ break;
+ case A_STARTUP:
+ actionAsText = "A_STARTUP";
+ break;
+ case A_STARTED:
+ actionAsText = "A_STARTED";
+ break;
+ case A_HA_CONNECT:
+ actionAsText = "A_HA_CONNECT";
+ break;
+ case A_HA_DISCONNECT:
+ actionAsText = "A_HA_DISCONNECT";
+ break;
+ case A_LRM_CONNECT:
+ actionAsText = "A_LRM_CONNECT";
+ break;
+ case A_LRM_EVENT:
+ actionAsText = "A_LRM_EVENT";
+ break;
+ case A_LRM_INVOKE:
+ actionAsText = "A_LRM_INVOKE";
+ break;
+ case A_LRM_DISCONNECT:
+ actionAsText = "A_LRM_DISCONNECT";
+ break;
+ case O_LRM_RECONNECT:
+ actionAsText = "O_LRM_RECONNECT";
+ break;
+ case A_CL_JOIN_QUERY:
+ actionAsText = "A_CL_JOIN_QUERY";
+ break;
+ case A_DC_TIMER_STOP:
+ actionAsText = "A_DC_TIMER_STOP";
+ break;
+ case A_DC_TIMER_START:
+ actionAsText = "A_DC_TIMER_START";
+ break;
+ case A_INTEGRATE_TIMER_START:
+ actionAsText = "A_INTEGRATE_TIMER_START";
+ break;
+ case A_INTEGRATE_TIMER_STOP:
+ actionAsText = "A_INTEGRATE_TIMER_STOP";
+ break;
+ case A_FINALIZE_TIMER_START:
+ actionAsText = "A_FINALIZE_TIMER_START";
+ break;
+ case A_FINALIZE_TIMER_STOP:
+ actionAsText = "A_FINALIZE_TIMER_STOP";
+ break;
+ case A_ELECTION_COUNT:
+ actionAsText = "A_ELECTION_COUNT";
+ break;
+ case A_ELECTION_VOTE:
+ actionAsText = "A_ELECTION_VOTE";
+ break;
+ case A_ELECTION_CHECK:
+ actionAsText = "A_ELECTION_CHECK";
+ break;
+ case A_CL_JOIN_ANNOUNCE:
+ actionAsText = "A_CL_JOIN_ANNOUNCE";
+ break;
+ case A_CL_JOIN_REQUEST:
+ actionAsText = "A_CL_JOIN_REQUEST";
+ break;
+ case A_CL_JOIN_RESULT:
+ actionAsText = "A_CL_JOIN_RESULT";
+ break;
+ case A_DC_JOIN_OFFER_ALL:
+ actionAsText = "A_DC_JOIN_OFFER_ALL";
+ break;
+ case A_DC_JOIN_OFFER_ONE:
+ actionAsText = "A_DC_JOIN_OFFER_ONE";
+ break;
+ case A_DC_JOIN_PROCESS_REQ:
+ actionAsText = "A_DC_JOIN_PROCESS_REQ";
+ break;
+ case A_DC_JOIN_PROCESS_ACK:
+ actionAsText = "A_DC_JOIN_PROCESS_ACK";
+ break;
+ case A_DC_JOIN_FINALIZE:
+ actionAsText = "A_DC_JOIN_FINALIZE";
+ break;
+ case A_MSG_PROCESS:
+ actionAsText = "A_MSG_PROCESS";
+ break;
+ case A_MSG_ROUTE:
+ actionAsText = "A_MSG_ROUTE";
+ break;
+ case A_RECOVER:
+ actionAsText = "A_RECOVER";
+ break;
+ case A_DC_RELEASE:
+ actionAsText = "A_DC_RELEASE";
+ break;
+ case A_DC_RELEASED:
+ actionAsText = "A_DC_RELEASED";
+ break;
+ case A_DC_TAKEOVER:
+ actionAsText = "A_DC_TAKEOVER";
+ break;
+ case A_SHUTDOWN:
+ actionAsText = "A_SHUTDOWN";
+ break;
+ case A_SHUTDOWN_REQ:
+ actionAsText = "A_SHUTDOWN_REQ";
+ break;
+ case A_STOP:
+ actionAsText = "A_STOP ";
+ break;
+ case A_EXIT_0:
+ actionAsText = "A_EXIT_0";
+ break;
+ case A_EXIT_1:
+ actionAsText = "A_EXIT_1";
+ break;
+ case O_CIB_RESTART:
+ actionAsText = "O_CIB_RESTART";
+ break;
+ case A_CIB_START:
+ actionAsText = "A_CIB_START";
+ break;
+ case A_CIB_STOP:
+ actionAsText = "A_CIB_STOP";
+ break;
+ case A_TE_INVOKE:
+ actionAsText = "A_TE_INVOKE";
+ break;
+ case O_TE_RESTART:
+ actionAsText = "O_TE_RESTART";
+ break;
+ case A_TE_START:
+ actionAsText = "A_TE_START";
+ break;
+ case A_TE_STOP:
+ actionAsText = "A_TE_STOP";
+ break;
+ case A_TE_HALT:
+ actionAsText = "A_TE_HALT";
+ break;
+ case A_TE_CANCEL:
+ actionAsText = "A_TE_CANCEL";
+ break;
+ case A_PE_INVOKE:
+ actionAsText = "A_PE_INVOKE";
+ break;
+ case O_PE_RESTART:
+ actionAsText = "O_PE_RESTART";
+ break;
+ case A_PE_START:
+ actionAsText = "A_PE_START";
+ break;
+ case A_PE_STOP:
+ actionAsText = "A_PE_STOP";
+ break;
+ case A_NODE_BLOCK:
+ actionAsText = "A_NODE_BLOCK";
+ break;
+ case A_UPDATE_NODESTATUS:
+ actionAsText = "A_UPDATE_NODESTATUS";
+ break;
+ case A_LOG:
+ actionAsText = "A_LOG ";
+ break;
+ case A_ERROR:
+ actionAsText = "A_ERROR ";
+ break;
+ case A_WARN:
+ actionAsText = "A_WARN ";
+ break;
+ /* Composite actions */
+ case A_DC_TIMER_START | A_CL_JOIN_QUERY:
+ actionAsText = "A_DC_TIMER_START|A_CL_JOIN_QUERY";
+ break;
+ }
+
+ if (actionAsText == NULL) {
+ crm_err("Action %.16llx is unknown", action);
+ actionAsText = "<UNKNOWN_ACTION>";
+ }
+
+ return actionAsText;
+}
+
+void
+fsa_dump_inputs(int log_level, const char *text, long long input_register)
+{
+ if (input_register == A_NOTHING) {
+ return;
+ }
+ if (text == NULL) {
+ text = "Input register contents:";
+ }
+
+ if (pcmk_is_set(input_register, R_THE_DC)) {
+ crm_trace("%s %.16llx (R_THE_DC)", text, R_THE_DC);
+ }
+ if (pcmk_is_set(input_register, R_STARTING)) {
+ crm_trace("%s %.16llx (R_STARTING)", text, R_STARTING);
+ }
+ if (pcmk_is_set(input_register, R_SHUTDOWN)) {
+ crm_trace("%s %.16llx (R_SHUTDOWN)", text, R_SHUTDOWN);
+ }
+ if (pcmk_is_set(input_register, R_STAYDOWN)) {
+ crm_trace("%s %.16llx (R_STAYDOWN)", text, R_STAYDOWN);
+ }
+ if (pcmk_is_set(input_register, R_JOIN_OK)) {
+ crm_trace("%s %.16llx (R_JOIN_OK)", text, R_JOIN_OK);
+ }
+ if (pcmk_is_set(input_register, R_READ_CONFIG)) {
+ crm_trace("%s %.16llx (R_READ_CONFIG)", text, R_READ_CONFIG);
+ }
+ if (pcmk_is_set(input_register, R_INVOKE_PE)) {
+ crm_trace("%s %.16llx (R_INVOKE_PE)", text, R_INVOKE_PE);
+ }
+ if (pcmk_is_set(input_register, R_CIB_CONNECTED)) {
+ crm_trace("%s %.16llx (R_CIB_CONNECTED)", text, R_CIB_CONNECTED);
+ }
+ if (pcmk_is_set(input_register, R_PE_CONNECTED)) {
+ crm_trace("%s %.16llx (R_PE_CONNECTED)", text, R_PE_CONNECTED);
+ }
+ if (pcmk_is_set(input_register, R_TE_CONNECTED)) {
+ crm_trace("%s %.16llx (R_TE_CONNECTED)", text, R_TE_CONNECTED);
+ }
+ if (pcmk_is_set(input_register, R_LRM_CONNECTED)) {
+ crm_trace("%s %.16llx (R_LRM_CONNECTED)", text, R_LRM_CONNECTED);
+ }
+ if (pcmk_is_set(input_register, R_CIB_REQUIRED)) {
+ crm_trace("%s %.16llx (R_CIB_REQUIRED)", text, R_CIB_REQUIRED);
+ }
+ if (pcmk_is_set(input_register, R_PE_REQUIRED)) {
+ crm_trace("%s %.16llx (R_PE_REQUIRED)", text, R_PE_REQUIRED);
+ }
+ if (pcmk_is_set(input_register, R_TE_REQUIRED)) {
+ crm_trace("%s %.16llx (R_TE_REQUIRED)", text, R_TE_REQUIRED);
+ }
+ if (pcmk_is_set(input_register, R_REQ_PEND)) {
+ crm_trace("%s %.16llx (R_REQ_PEND)", text, R_REQ_PEND);
+ }
+ if (pcmk_is_set(input_register, R_PE_PEND)) {
+ crm_trace("%s %.16llx (R_PE_PEND)", text, R_PE_PEND);
+ }
+ if (pcmk_is_set(input_register, R_TE_PEND)) {
+ crm_trace("%s %.16llx (R_TE_PEND)", text, R_TE_PEND);
+ }
+ if (pcmk_is_set(input_register, R_RESP_PEND)) {
+ crm_trace("%s %.16llx (R_RESP_PEND)", text, R_RESP_PEND);
+ }
+ if (pcmk_is_set(input_register, R_CIB_DONE)) {
+ crm_trace("%s %.16llx (R_CIB_DONE)", text, R_CIB_DONE);
+ }
+ if (pcmk_is_set(input_register, R_HAVE_CIB)) {
+ crm_trace("%s %.16llx (R_HAVE_CIB)", text, R_HAVE_CIB);
+ }
+ if (pcmk_is_set(input_register, R_MEMBERSHIP)) {
+ crm_trace("%s %.16llx (R_MEMBERSHIP)", text, R_MEMBERSHIP);
+ }
+ if (pcmk_is_set(input_register, R_PEER_DATA)) {
+ crm_trace("%s %.16llx (R_PEER_DATA)", text, R_PEER_DATA);
+ }
+ if (pcmk_is_set(input_register, R_IN_RECOVERY)) {
+ crm_trace("%s %.16llx (R_IN_RECOVERY)", text, R_IN_RECOVERY);
+ }
+}
+
+void
+fsa_dump_actions(uint64_t action, const char *text)
+{
+ if (pcmk_is_set(action, A_READCONFIG)) {
+ crm_trace("Action %.16llx (A_READCONFIG) %s", A_READCONFIG, text);
+ }
+ if (pcmk_is_set(action, A_STARTUP)) {
+ crm_trace("Action %.16llx (A_STARTUP) %s", A_STARTUP, text);
+ }
+ if (pcmk_is_set(action, A_STARTED)) {
+ crm_trace("Action %.16llx (A_STARTED) %s", A_STARTED, text);
+ }
+ if (pcmk_is_set(action, A_HA_CONNECT)) {
+ crm_trace("Action %.16llx (A_CONNECT) %s", A_HA_CONNECT, text);
+ }
+ if (pcmk_is_set(action, A_HA_DISCONNECT)) {
+ crm_trace("Action %.16llx (A_DISCONNECT) %s", A_HA_DISCONNECT, text);
+ }
+ if (pcmk_is_set(action, A_LRM_CONNECT)) {
+ crm_trace("Action %.16llx (A_LRM_CONNECT) %s", A_LRM_CONNECT, text);
+ }
+ if (pcmk_is_set(action, A_LRM_EVENT)) {
+ crm_trace("Action %.16llx (A_LRM_EVENT) %s", A_LRM_EVENT, text);
+ }
+ if (pcmk_is_set(action, A_LRM_INVOKE)) {
+ crm_trace("Action %.16llx (A_LRM_INVOKE) %s", A_LRM_INVOKE, text);
+ }
+ if (pcmk_is_set(action, A_LRM_DISCONNECT)) {
+ crm_trace("Action %.16llx (A_LRM_DISCONNECT) %s", A_LRM_DISCONNECT, text);
+ }
+ if (pcmk_is_set(action, A_DC_TIMER_STOP)) {
+ crm_trace("Action %.16llx (A_DC_TIMER_STOP) %s", A_DC_TIMER_STOP, text);
+ }
+ if (pcmk_is_set(action, A_DC_TIMER_START)) {
+ crm_trace("Action %.16llx (A_DC_TIMER_START) %s", A_DC_TIMER_START, text);
+ }
+ if (pcmk_is_set(action, A_INTEGRATE_TIMER_START)) {
+ crm_trace("Action %.16llx (A_INTEGRATE_TIMER_START) %s", A_INTEGRATE_TIMER_START, text);
+ }
+ if (pcmk_is_set(action, A_INTEGRATE_TIMER_STOP)) {
+ crm_trace("Action %.16llx (A_INTEGRATE_TIMER_STOP) %s", A_INTEGRATE_TIMER_STOP, text);
+ }
+ if (pcmk_is_set(action, A_FINALIZE_TIMER_START)) {
+ crm_trace("Action %.16llx (A_FINALIZE_TIMER_START) %s", A_FINALIZE_TIMER_START, text);
+ }
+ if (pcmk_is_set(action, A_FINALIZE_TIMER_STOP)) {
+ crm_trace("Action %.16llx (A_FINALIZE_TIMER_STOP) %s", A_FINALIZE_TIMER_STOP, text);
+ }
+ if (pcmk_is_set(action, A_ELECTION_COUNT)) {
+ crm_trace("Action %.16llx (A_ELECTION_COUNT) %s", A_ELECTION_COUNT, text);
+ }
+ if (pcmk_is_set(action, A_ELECTION_VOTE)) {
+ crm_trace("Action %.16llx (A_ELECTION_VOTE) %s", A_ELECTION_VOTE, text);
+ }
+ if (pcmk_is_set(action, A_ELECTION_CHECK)) {
+ crm_trace("Action %.16llx (A_ELECTION_CHECK) %s", A_ELECTION_CHECK, text);
+ }
+ if (pcmk_is_set(action, A_CL_JOIN_ANNOUNCE)) {
+ crm_trace("Action %.16llx (A_CL_JOIN_ANNOUNCE) %s", A_CL_JOIN_ANNOUNCE, text);
+ }
+ if (pcmk_is_set(action, A_CL_JOIN_REQUEST)) {
+ crm_trace("Action %.16llx (A_CL_JOIN_REQUEST) %s", A_CL_JOIN_REQUEST, text);
+ }
+ if (pcmk_is_set(action, A_CL_JOIN_RESULT)) {
+ crm_trace("Action %.16llx (A_CL_JOIN_RESULT) %s", A_CL_JOIN_RESULT, text);
+ }
+ if (pcmk_is_set(action, A_DC_JOIN_OFFER_ALL)) {
+ crm_trace("Action %.16llx (A_DC_JOIN_OFFER_ALL) %s", A_DC_JOIN_OFFER_ALL, text);
+ }
+ if (pcmk_is_set(action, A_DC_JOIN_OFFER_ONE)) {
+ crm_trace("Action %.16llx (A_DC_JOIN_OFFER_ONE) %s", A_DC_JOIN_OFFER_ONE, text);
+ }
+ if (pcmk_is_set(action, A_DC_JOIN_PROCESS_REQ)) {
+ crm_trace("Action %.16llx (A_DC_JOIN_PROCESS_REQ) %s", A_DC_JOIN_PROCESS_REQ, text);
+ }
+ if (pcmk_is_set(action, A_DC_JOIN_PROCESS_ACK)) {
+ crm_trace("Action %.16llx (A_DC_JOIN_PROCESS_ACK) %s", A_DC_JOIN_PROCESS_ACK, text);
+ }
+ if (pcmk_is_set(action, A_DC_JOIN_FINALIZE)) {
+ crm_trace("Action %.16llx (A_DC_JOIN_FINALIZE) %s", A_DC_JOIN_FINALIZE, text);
+ }
+ if (pcmk_is_set(action, A_MSG_PROCESS)) {
+ crm_trace("Action %.16llx (A_MSG_PROCESS) %s", A_MSG_PROCESS, text);
+ }
+ if (pcmk_is_set(action, A_MSG_ROUTE)) {
+ crm_trace("Action %.16llx (A_MSG_ROUTE) %s", A_MSG_ROUTE, text);
+ }
+ if (pcmk_is_set(action, A_RECOVER)) {
+ crm_trace("Action %.16llx (A_RECOVER) %s", A_RECOVER, text);
+ }
+ if (pcmk_is_set(action, A_DC_RELEASE)) {
+ crm_trace("Action %.16llx (A_DC_RELEASE) %s", A_DC_RELEASE, text);
+ }
+ if (pcmk_is_set(action, A_DC_RELEASED)) {
+ crm_trace("Action %.16llx (A_DC_RELEASED) %s", A_DC_RELEASED, text);
+ }
+ if (pcmk_is_set(action, A_DC_TAKEOVER)) {
+ crm_trace("Action %.16llx (A_DC_TAKEOVER) %s", A_DC_TAKEOVER, text);
+ }
+ if (pcmk_is_set(action, A_SHUTDOWN)) {
+ crm_trace("Action %.16llx (A_SHUTDOWN) %s", A_SHUTDOWN, text);
+ }
+ if (pcmk_is_set(action, A_SHUTDOWN_REQ)) {
+ crm_trace("Action %.16llx (A_SHUTDOWN_REQ) %s", A_SHUTDOWN_REQ, text);
+ }
+ if (pcmk_is_set(action, A_STOP)) {
+ crm_trace("Action %.16llx (A_STOP ) %s", A_STOP, text);
+ }
+ if (pcmk_is_set(action, A_EXIT_0)) {
+ crm_trace("Action %.16llx (A_EXIT_0) %s", A_EXIT_0, text);
+ }
+ if (pcmk_is_set(action, A_EXIT_1)) {
+ crm_trace("Action %.16llx (A_EXIT_1) %s", A_EXIT_1, text);
+ }
+ if (pcmk_is_set(action, A_CIB_START)) {
+ crm_trace("Action %.16llx (A_CIB_START) %s", A_CIB_START, text);
+ }
+ if (pcmk_is_set(action, A_CIB_STOP)) {
+ crm_trace("Action %.16llx (A_CIB_STOP) %s", A_CIB_STOP, text);
+ }
+ if (pcmk_is_set(action, A_TE_INVOKE)) {
+ crm_trace("Action %.16llx (A_TE_INVOKE) %s", A_TE_INVOKE, text);
+ }
+ if (pcmk_is_set(action, A_TE_START)) {
+ crm_trace("Action %.16llx (A_TE_START) %s", A_TE_START, text);
+ }
+ if (pcmk_is_set(action, A_TE_STOP)) {
+ crm_trace("Action %.16llx (A_TE_STOP) %s", A_TE_STOP, text);
+ }
+ if (pcmk_is_set(action, A_TE_CANCEL)) {
+ crm_trace("Action %.16llx (A_TE_CANCEL) %s", A_TE_CANCEL, text);
+ }
+ if (pcmk_is_set(action, A_PE_INVOKE)) {
+ crm_trace("Action %.16llx (A_PE_INVOKE) %s", A_PE_INVOKE, text);
+ }
+ if (pcmk_is_set(action, A_PE_START)) {
+ crm_trace("Action %.16llx (A_PE_START) %s", A_PE_START, text);
+ }
+ if (pcmk_is_set(action, A_PE_STOP)) {
+ crm_trace("Action %.16llx (A_PE_STOP) %s", A_PE_STOP, text);
+ }
+ if (pcmk_is_set(action, A_NODE_BLOCK)) {
+ crm_trace("Action %.16llx (A_NODE_BLOCK) %s", A_NODE_BLOCK, text);
+ }
+ if (pcmk_is_set(action, A_UPDATE_NODESTATUS)) {
+ crm_trace("Action %.16llx (A_UPDATE_NODESTATUS) %s", A_UPDATE_NODESTATUS, text);
+ }
+ if (pcmk_is_set(action, A_LOG)) {
+ crm_trace("Action %.16llx (A_LOG ) %s", A_LOG, text);
+ }
+ if (pcmk_is_set(action, A_ERROR)) {
+ crm_trace("Action %.16llx (A_ERROR ) %s", A_ERROR, text);
+ }
+ if (pcmk_is_set(action, A_WARN)) {
+ crm_trace("Action %.16llx (A_WARN ) %s", A_WARN, text);
+ }
+}
+
+gboolean
+update_dc(xmlNode * msg)
+{
+ char *last_dc = controld_globals.dc_name;
+ const char *dc_version = NULL;
+ const char *welcome_from = NULL;
+
+ if (msg != NULL) {
+ gboolean invalid = FALSE;
+
+ dc_version = crm_element_value(msg, F_CRM_VERSION);
+ welcome_from = crm_element_value(msg, F_CRM_HOST_FROM);
+
+ CRM_CHECK(dc_version != NULL, return FALSE);
+ CRM_CHECK(welcome_from != NULL, return FALSE);
+
+ if (AM_I_DC
+ && !pcmk__str_eq(welcome_from, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ invalid = TRUE;
+
+ } else if ((controld_globals.dc_name != NULL)
+ && !pcmk__str_eq(welcome_from, controld_globals.dc_name,
+ pcmk__str_casei)) {
+ invalid = TRUE;
+ }
+
+ if (invalid) {
+ if (AM_I_DC) {
+ crm_err("Not updating DC to %s (%s): we are also a DC",
+ welcome_from, dc_version);
+ } else {
+ crm_warn("New DC %s is not %s",
+ welcome_from, controld_globals.dc_name);
+ }
+
+ controld_set_fsa_action_flags(A_CL_JOIN_QUERY | A_DC_TIMER_START);
+ controld_trigger_fsa();
+ return FALSE;
+ }
+ }
+
+ controld_globals.dc_name = NULL; // freed as last_dc
+ pcmk__str_update(&(controld_globals.dc_name), welcome_from);
+ pcmk__str_update(&(controld_globals.dc_version), dc_version);
+
+ if (pcmk__str_eq(controld_globals.dc_name, last_dc, pcmk__str_casei)) {
+ /* do nothing */
+
+ } else if (controld_globals.dc_name != NULL) {
+ crm_node_t *dc_node = crm_get_peer(0, controld_globals.dc_name);
+
+ crm_info("Set DC to %s (%s)",
+ controld_globals.dc_name,
+ pcmk__s(controld_globals.dc_version, "unknown version"));
+ pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_MEMBER);
+
+ } else if (last_dc != NULL) {
+ crm_info("Unset DC (was %s)", last_dc);
+ }
+
+ free(last_dc);
+ return TRUE;
+}
+
+void crmd_peer_down(crm_node_t *peer, bool full)
+{
+ if(full && peer->state == NULL) {
+ pcmk__update_peer_state(__func__, peer, CRM_NODE_LOST, 0);
+ crm_update_peer_proc(__func__, peer, crm_proc_none, NULL);
+ }
+ crm_update_peer_join(__func__, peer, crm_join_none);
+ pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN);
+}
+
+/*!
+ * \internal
+ * \brief Check feature set compatibility of DC and joining node
+ *
+ * Return true if a joining node's CRM feature set is compatible with the
+ * current DC's. The feature sets are compatible if they have the same major
+ * version number, and the DC's minor version number is the same or older than
+ * the joining node's. The minor-minor version is intended solely to allow
+ * resource agents to detect feature support, and so is ignored.
+ *
+ * \param[in] dc_version DC's feature set
+ * \param[in] join_version Joining node's version
+ */
+bool
+feature_set_compatible(const char *dc_version, const char *join_version)
+{
+ char *dc_minor = NULL;
+ char *join_minor = NULL;
+ long dc_v = 0;
+ long join_v = 0;
+
+ // Get DC's major version
+ errno = 0;
+ dc_v = strtol(dc_version, &dc_minor, 10);
+ if (errno) {
+ return FALSE;
+ }
+
+ // Get joining node's major version
+ errno = 0;
+ join_v = strtol(join_version, &join_minor, 10);
+ if (errno) {
+ return FALSE;
+ }
+
+ // Major version component must be identical
+ if (dc_v != join_v) {
+ return FALSE;
+ }
+
+ // Get DC's minor version
+ if (*dc_minor == '.') {
+ ++dc_minor;
+ }
+ errno = 0;
+ dc_v = strtol(dc_minor, NULL, 10);
+ if (errno) {
+ return FALSE;
+ }
+
+ // Get joining node's minor version
+ if (*join_minor == '.') {
+ ++join_minor;
+ }
+ errno = 0;
+ join_v = strtol(join_minor, NULL, 10);
+ if (errno) {
+ return FALSE;
+ }
+
+ // DC's minor version must be the same or older
+ return dc_v <= join_v;
+}
+
+const char *
+get_node_id(xmlNode *lrm_rsc_op)
+{
+ xmlNode *node = lrm_rsc_op;
+
+ while (node != NULL && !pcmk__str_eq(XML_CIB_TAG_STATE, TYPE(node), pcmk__str_casei)) {
+ node = node->parent;
+ }
+
+ CRM_CHECK(node != NULL, return NULL);
+ return ID(node);
+}
diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h
new file mode 100644
index 0000000..6ce413d
--- /dev/null
+++ b/daemons/controld/controld_utils.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef CRMD_UTILS__H
+# define CRMD_UTILS__H
+
+# include <crm/crm.h>
+# include <crm/common/xml.h>
+
+# define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
+
+enum node_update_flags {
+ node_update_none = 0x0000,
+ node_update_quick = 0x0001,
+ node_update_cluster = 0x0010,
+ node_update_peer = 0x0020,
+ node_update_join = 0x0040,
+ node_update_expected = 0x0100,
+ node_update_all = node_update_cluster|node_update_peer|node_update_join|node_update_expected,
+};
+
+crm_exit_t crmd_exit(crm_exit_t exit_code);
+_Noreturn void crmd_fast_exit(crm_exit_t exit_code);
+void controld_shutdown_schedulerd_ipc(void);
+void controld_stop_sched_timer(void);
+void controld_free_sched_timer(void);
+void controld_expect_sched_reply(char *ref);
+
+void fsa_dump_actions(uint64_t action, const char *text);
+void fsa_dump_inputs(int log_level, const char *text, long long input_register);
+
+gboolean update_dc(xmlNode * msg);
+void crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase);
+xmlNode *create_node_state_update(crm_node_t *node, int flags,
+ xmlNode *parent, const char *source);
+void populate_cib_nodes(enum node_update_flags flags, const char *source);
+void crm_update_quorum(gboolean quorum, gboolean force_update);
+void controld_close_attrd_ipc(void);
+void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node);
+void update_attrd_list(GList *attrs, uint32_t opts);
+void update_attrd_remote_node_removed(const char *host, const char *user_name);
+void update_attrd_clear_failures(const char *host, const char *rsc,
+ const char *op, const char *interval_spec,
+ gboolean is_remote_node);
+
+int crmd_join_phase_count(enum crm_join_phase phase);
+void crmd_join_phase_log(int level);
+
+void crmd_peer_down(crm_node_t *peer, bool full);
+
+bool feature_set_compatible(const char *dc_version, const char *join_version);
+
+const char *get_node_id(xmlNode *lrm_rsc_op);
+
+#endif
diff --git a/daemons/controld/pacemaker-controld.c b/daemons/controld/pacemaker-controld.c
new file mode 100644
index 0000000..5858898
--- /dev/null
+++ b/daemons/controld/pacemaker-controld.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <crm/crm.h>
+#include <crm/common/cmdline_internal.h>
+#include <crm/common/ipc.h>
+#include <crm/common/output_internal.h>
+#include <crm/common/xml.h>
+
+#include <pacemaker-controld.h>
+
+#define SUMMARY "daemon for coordinating a Pacemaker cluster's response " \
+ "to events"
+
+_Noreturn void crmd_init(void);
+extern void init_dotfile(void);
+
+controld_globals_t controld_globals = {
+ // Automatic initialization to 0, false, or NULL is fine for most members
+ .fsa_state = S_STARTING,
+ .fsa_actions = A_NOTHING,
+};
+
+static pcmk__supported_format_t formats[] = {
+ PCMK__SUPPORTED_FORMAT_NONE,
+ PCMK__SUPPORTED_FORMAT_TEXT,
+ PCMK__SUPPORTED_FORMAT_XML,
+ { NULL, NULL, NULL }
+};
+
+static GOptionContext *
+build_arg_context(pcmk__common_args_t *args, GOptionGroup **group)
+{
+ return pcmk__build_arg_context(args, "text (default), xml", group,
+ "[metadata]");
+}
+
+int
+main(int argc, char **argv)
+{
+ int rc = pcmk_rc_ok;
+ crm_exit_t exit_code = CRM_EX_OK;
+ bool initialize = true;
+
+ crm_ipc_t *old_instance = NULL;
+
+ pcmk__output_t *out = NULL;
+
+ GError *error = NULL;
+
+ GOptionGroup *output_group = NULL;
+ pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
+ gchar **processed_args = pcmk__cmdline_preproc(argv, NULL);
+ GOptionContext *context = build_arg_context(args, &output_group);
+
+ crm_log_preinit(NULL, argc, argv);
+
+ pcmk__register_formats(output_group, formats);
+ if (!g_option_context_parse_strv(context, &processed_args, &error)) {
+ exit_code = CRM_EX_USAGE;
+ goto done;
+ }
+
+ rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
+ if (rc != pcmk_rc_ok) {
+ exit_code = CRM_EX_ERROR;
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Error creating output format %s: %s",
+ args->output_ty, pcmk_rc_str(rc));
+ goto done;
+ }
+
+ if (args->version) {
+ out->version(out, false);
+ initialize = false;
+ goto done;
+ }
+
+ if ((g_strv_length(processed_args) >= 2)
+ && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) {
+ crmd_metadata();
+ initialize = false;
+ goto done;
+ }
+
+ pcmk__cli_init_logging("pacemaker-controld", args->verbosity);
+ crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
+ crm_notice("Starting Pacemaker controller");
+
+ old_instance = crm_ipc_new(CRM_SYSTEM_CRMD, 0);
+ if (old_instance == NULL) {
+ /* crm_ipc_new will have already printed an error message with crm_err. */
+ exit_code = CRM_EX_FATAL;
+ goto done;
+ }
+
+ if (crm_ipc_connect(old_instance)) {
+ /* IPC end-point already up */
+ crm_ipc_close(old_instance);
+ crm_ipc_destroy(old_instance);
+ crm_err("pacemaker-controld is already active, aborting startup");
+ initialize = false;
+ goto done;
+
+ } else {
+ /* not up or not authentic, we'll proceed either way */
+ crm_ipc_destroy(old_instance);
+ old_instance = NULL;
+ }
+
+ if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) {
+ exit_code = CRM_EX_FATAL;
+ crm_err("Terminating due to bad permissions on " PE_STATE_DIR);
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Bad permissions on " PE_STATE_DIR
+ " (see logs for details)");
+ goto done;
+
+ } else if (pcmk__daemon_can_write(CRM_CONFIG_DIR, NULL) == FALSE) {
+ exit_code = CRM_EX_FATAL;
+ crm_err("Terminating due to bad permissions on " CRM_CONFIG_DIR);
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Bad permissions on " CRM_CONFIG_DIR
+ " (see logs for details)");
+ goto done;
+ }
+
+ if (pcmk__log_output_new(&(controld_globals.logger_out)) != pcmk_rc_ok) {
+ exit_code = CRM_EX_FATAL;
+ goto done;
+ }
+
+ pcmk__output_set_log_level(controld_globals.logger_out, LOG_TRACE);
+
+done:
+ g_strfreev(processed_args);
+ pcmk__free_arg_context(context);
+
+ pcmk__output_and_clear_error(&error, out);
+
+ if (out != NULL) {
+ out->finish(out, exit_code, true, NULL);
+ pcmk__output_free(out);
+ }
+ pcmk__unregister_formats();
+
+ if ((exit_code == CRM_EX_OK) && initialize) {
+ // Does not return
+ crmd_init();
+ }
+ crm_exit(exit_code);
+}
+
+void
+crmd_init(void)
+{
+ crm_exit_t exit_code = CRM_EX_OK;
+ enum crmd_fsa_state state;
+
+ init_dotfile();
+ register_fsa_input(C_STARTUP, I_STARTUP, NULL);
+
+ crm_peer_init();
+ state = s_crmd_fsa(C_STARTUP);
+
+ if (state == S_PENDING || state == S_STARTING) {
+ /* Create the mainloop and run it... */
+ crm_trace("Starting %s's mainloop", crm_system_name);
+ controld_globals.mainloop = g_main_loop_new(NULL, FALSE);
+ g_main_loop_run(controld_globals.mainloop);
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) {
+ crm_info("Inhibiting automated respawn");
+ exit_code = CRM_EX_FATAL;
+ }
+
+ } else {
+ crm_err("Startup of %s failed. Current state: %s",
+ crm_system_name, fsa_state2string(state));
+ exit_code = CRM_EX_ERROR;
+ }
+
+ crm_info("%s[%lu] exiting with status %d (%s)",
+ crm_system_name, (unsigned long) getpid(), exit_code,
+ crm_exit_str(exit_code));
+
+ crmd_fast_exit(exit_code);
+}
diff --git a/daemons/controld/pacemaker-controld.h b/daemons/controld/pacemaker-controld.h
new file mode 100644
index 0000000..1484a00
--- /dev/null
+++ b/daemons/controld/pacemaker-controld.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef CRMD__H
+# define CRMD__H
+
+#include <controld_alerts.h>
+#include <controld_callbacks.h>
+#include <controld_cib.h>
+#include <controld_fencing.h>
+#include <controld_fsa.h>
+#include <controld_globals.h>
+#include <controld_timers.h>
+#include <controld_lrm.h>
+#include <controld_membership.h>
+#include <controld_messages.h>
+#include <controld_metadata.h>
+#include <controld_throttle.h>
+#include <controld_transition.h>
+#include <controld_utils.h>
+
+# define controld_trigger_config() \
+ controld_trigger_config_as(__func__, __LINE__)
+
+void crmd_metadata(void);
+void controld_trigger_config_as(const char *fn, int line);
+void controld_election_init(const char *uname);
+void controld_configure_election(GHashTable *options);
+void controld_remove_voter(const char *uname);
+void controld_election_fini(void);
+void controld_stop_current_election_timeout(void);
+
+#endif
diff --git a/daemons/execd/Makefile.am b/daemons/execd/Makefile.am
new file mode 100644
index 0000000..466f0df
--- /dev/null
+++ b/daemons/execd/Makefile.am
@@ -0,0 +1,76 @@
+#
+# Copyright 2012-2021 the Pacemaker project contributors
+#
+# The version control history for this file may have further details.
+#
+# This source code is licensed under the GNU Lesser General Public License
+# version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+#
+
+include $(top_srcdir)/mk/common.mk
+include $(top_srcdir)/mk/man.mk
+
+halibdir = $(CRM_DAEMON_DIR)
+
+halib_PROGRAMS = pacemaker-execd cts-exec-helper
+
+EXTRA_DIST = pacemaker-remoted.8.inc
+
+pacemaker_execd_CFLAGS = $(CFLAGS_HARDENED_EXE)
+pacemaker_execd_LDFLAGS = $(LDFLAGS_HARDENED_EXE)
+
+pacemaker_execd_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \
+ $(top_builddir)/lib/services/libcrmservice.la \
+ $(top_builddir)/lib/fencing/libstonithd.la
+pacemaker_execd_SOURCES = pacemaker-execd.c execd_commands.c \
+ execd_alerts.c
+
+if BUILD_REMOTE
+sbin_PROGRAMS = pacemaker-remoted
+if BUILD_SYSTEMD
+systemdsystemunit_DATA = pacemaker_remote.service
+else
+initdir = $(INITDIR)
+init_SCRIPTS = pacemaker_remote
+endif
+
+pacemaker_remoted_CPPFLAGS = -DPCMK__COMPILE_REMOTE $(AM_CPPFLAGS)
+
+pacemaker_remoted_CFLAGS = $(CFLAGS_HARDENED_EXE)
+pacemaker_remoted_LDFLAGS = $(LDFLAGS_HARDENED_EXE)
+
+pacemaker_remoted_LDADD = $(pacemaker_execd_LDADD) \
+ $(top_builddir)/lib/lrmd/liblrmd.la
+pacemaker_remoted_SOURCES = $(pacemaker_execd_SOURCES) \
+ remoted_tls.c remoted_pidone.c remoted_proxy.c
+endif
+
+cts_exec_helper_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \
+ $(top_builddir)/lib/lrmd/liblrmd.la \
+ $(top_builddir)/lib/cib/libcib.la \
+ $(top_builddir)/lib/services/libcrmservice.la \
+ $(top_builddir)/lib/pengine/libpe_status.la
+cts_exec_helper_SOURCES = cts-exec-helper.c
+
+noinst_HEADERS = pacemaker-execd.h
+
+CLEANFILES = $(man8_MANS)
+
+# Always create a symlink for the old pacemaker_remoted name, so that bundle
+# container images using a current Pacemaker will run on cluster nodes running
+# Pacemaker 1 (>=1.1.17).
+install-exec-hook:
+if BUILD_LEGACY_LINKS
+ cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f lrmd && $(LN_S) pacemaker-execd lrmd
+endif
+if BUILD_REMOTE
+ cd $(DESTDIR)$(sbindir) && rm -f pacemaker_remoted && $(LN_S) pacemaker-remoted pacemaker_remoted
+endif
+
+uninstall-hook:
+if BUILD_LEGACY_LINKS
+ cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f lrmd
+endif
+if BUILD_REMOTE
+ cd $(DESTDIR)$(sbindir) && rm -f pacemaker_remoted
+endif
diff --git a/daemons/execd/cts-exec-helper.c b/daemons/execd/cts-exec-helper.c
new file mode 100644
index 0000000..2af5e16
--- /dev/null
+++ b/daemons/execd/cts-exec-helper.c
@@ -0,0 +1,624 @@
+/*
+ * Copyright 2012-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <glib.h>
+#include <unistd.h>
+
+#include <crm/crm.h>
+#include <crm/services.h>
+#include <crm/common/cmdline_internal.h>
+#include <crm/common/mainloop.h>
+
+#include <crm/pengine/status.h>
+#include <crm/pengine/internal.h>
+#include <crm/cib.h>
+#include <crm/cib/internal.h>
+#include <crm/lrmd.h>
+
+#define SUMMARY "cts-exec-helper - inject commands into the Pacemaker executor and watch for events"
+
+static int exec_call_id = 0;
+static gboolean start_test(gpointer user_data);
+static void try_connect(void);
+
+static char *key = NULL;
+static char *val = NULL;
+
+static struct {
+ int verbose;
+ int quiet;
+ guint interval_ms;
+ int timeout;
+ int start_delay;
+ int cancel_call_id;
+ gboolean no_wait;
+ gboolean is_running;
+ gboolean no_connect;
+ int exec_call_opts;
+ const char *api_call;
+ const char *rsc_id;
+ const char *provider;
+ const char *class;
+ const char *type;
+ const char *action;
+ const char *listen;
+ gboolean use_tls;
+ lrmd_key_value_t *params;
+} options;
+
+static gboolean
+interval_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
+ options.interval_ms = crm_parse_interval_spec(optarg);
+ return errno == 0;
+}
+
+static gboolean
+notify_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
+ if (pcmk__str_any_of(option_name, "--notify-orig", "-n", NULL)) {
+ options.exec_call_opts = lrmd_opt_notify_orig_only;
+ } else if (pcmk__str_any_of(option_name, "--notify-changes", "-o", NULL)) {
+ options.exec_call_opts = lrmd_opt_notify_changes_only;
+ }
+
+ return TRUE;
+}
+
+static gboolean
+param_key_val_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
+ if (pcmk__str_any_of(option_name, "--param-key", "-k", NULL)) {
+ pcmk__str_update(&key, optarg);
+ } else if (pcmk__str_any_of(option_name, "--param-val", "-v", NULL)) {
+ pcmk__str_update(&val, optarg);
+ }
+
+ if (key != NULL && val != NULL) {
+ options.params = lrmd_key_value_add(options.params, key, val);
+ pcmk__str_update(&key, NULL);
+ pcmk__str_update(&val, NULL);
+ }
+
+ return TRUE;
+}
+
+static GOptionEntry basic_entries[] = {
+ { "api-call", 'c', 0, G_OPTION_ARG_STRING, &options.api_call,
+ "Directly relates to executor API functions",
+ NULL },
+
+ { "is-running", 'R', 0, G_OPTION_ARG_NONE, &options.is_running,
+ "Determine if a resource is registered and running",
+ NULL },
+
+ { "listen", 'l', 0, G_OPTION_ARG_STRING, &options.listen,
+ "Listen for a specific event string",
+ NULL },
+
+ { "no-wait", 'w', 0, G_OPTION_ARG_NONE, &options.no_wait,
+ "Make api call and do not wait for result",
+ NULL },
+
+ { "notify-changes", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, notify_cb,
+ "Only notify client changes to recurring operations",
+ NULL },
+
+ { "notify-orig", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, notify_cb,
+ "Only notify this client of the results of an API action",
+ NULL },
+
+ { "tls", 'S', 0, G_OPTION_ARG_NONE, &options.use_tls,
+ "Use TLS backend for local connection",
+ NULL },
+
+ { NULL }
+};
+
+static GOptionEntry api_call_entries[] = {
+ { "action", 'a', 0, G_OPTION_ARG_STRING, &options.action,
+ NULL, NULL },
+
+ { "cancel-call-id", 'x', 0, G_OPTION_ARG_INT, &options.cancel_call_id,
+ NULL, NULL },
+
+ { "class", 'C', 0, G_OPTION_ARG_STRING, &options.class,
+ NULL, NULL },
+
+ { "interval", 'i', 0, G_OPTION_ARG_CALLBACK, interval_cb,
+ NULL, NULL },
+
+ { "param-key", 'k', 0, G_OPTION_ARG_CALLBACK, param_key_val_cb,
+ NULL, NULL },
+
+ { "param-val", 'v', 0, G_OPTION_ARG_CALLBACK, param_key_val_cb,
+ NULL, NULL },
+
+ { "provider", 'P', 0, G_OPTION_ARG_STRING, &options.provider,
+ NULL, NULL },
+
+ { "rsc-id", 'r', 0, G_OPTION_ARG_STRING, &options.rsc_id,
+ NULL, NULL },
+
+ { "start-delay", 's', 0, G_OPTION_ARG_INT, &options.start_delay,
+ NULL, NULL },
+
+ { "timeout", 't', 0, G_OPTION_ARG_INT, &options.timeout,
+ NULL, NULL },
+
+ { "type", 'T', 0, G_OPTION_ARG_STRING, &options.type,
+ NULL, NULL },
+
+ { NULL }
+};
+
+static GMainLoop *mainloop = NULL;
+static lrmd_t *lrmd_conn = NULL;
+
+static char event_buf_v0[1024];
+
+static crm_exit_t
+test_exit(crm_exit_t exit_code)
+{
+ lrmd_api_delete(lrmd_conn);
+ return crm_exit(exit_code);
+}
+
+#define print_result(fmt, args...) \
+ if (!options.quiet) { \
+ printf(fmt "\n" , ##args); \
+ }
+
+#define report_event(event) \
+ snprintf(event_buf_v0, sizeof(event_buf_v0), "NEW_EVENT event_type:%s rsc_id:%s action:%s rc:%s op_status:%s", \
+ lrmd_event_type2str(event->type), \
+ event->rsc_id, \
+ event->op_type ? event->op_type : "none", \
+ services_ocf_exitcode_str(event->rc), \
+ pcmk_exec_status_str(event->op_status)); \
+ crm_info("%s", event_buf_v0);
+
+static void
+test_shutdown(int nsig)
+{
+ lrmd_api_delete(lrmd_conn);
+ lrmd_conn = NULL;
+}
+
+static void
+read_events(lrmd_event_data_t * event)
+{
+ report_event(event);
+ if (options.listen) {
+ if (pcmk__str_eq(options.listen, event_buf_v0, pcmk__str_casei)) {
+ print_result("LISTEN EVENT SUCCESSFUL");
+ test_exit(CRM_EX_OK);
+ }
+ }
+
+ if (exec_call_id && (event->call_id == exec_call_id)) {
+ if (event->op_status == 0 && event->rc == 0) {
+ print_result("API-CALL SUCCESSFUL for 'exec'");
+ } else {
+ print_result("API-CALL FAILURE for 'exec', rc:%d lrmd_op_status:%s",
+ event->rc, pcmk_exec_status_str(event->op_status));
+ test_exit(CRM_EX_ERROR);
+ }
+
+ if (!options.listen) {
+ test_exit(CRM_EX_OK);
+ }
+ }
+}
+
+static gboolean
+timeout_err(gpointer data)
+{
+ print_result("LISTEN EVENT FAILURE - timeout occurred, never found");
+ test_exit(CRM_EX_TIMEOUT);
+ return FALSE;
+}
+
+static void
+connection_events(lrmd_event_data_t * event)
+{
+ int rc = event->connection_rc;
+
+ if (event->type != lrmd_event_connect) {
+ /* ignore */
+ return;
+ }
+
+ if (!rc) {
+ crm_info("Executor client connection established");
+ start_test(NULL);
+ return;
+ } else {
+ sleep(1);
+ try_connect();
+ crm_notice("Executor client connection failed");
+ }
+}
+
+static void
+try_connect(void)
+{
+ int tries = 10;
+ static int num_tries = 0;
+ int rc = 0;
+
+ lrmd_conn->cmds->set_callback(lrmd_conn, connection_events);
+ for (; num_tries < tries; num_tries++) {
+ rc = lrmd_conn->cmds->connect_async(lrmd_conn, crm_system_name, 3000);
+
+ if (!rc) {
+ return; /* we'll hear back in async callback */
+ }
+ sleep(1);
+ }
+
+ print_result("API CONNECTION FAILURE");
+ test_exit(CRM_EX_ERROR);
+}
+
+static gboolean
+start_test(gpointer user_data)
+{
+ int rc = 0;
+
+ if (!options.no_connect) {
+ if (!lrmd_conn->cmds->is_connected(lrmd_conn)) {
+ try_connect();
+ /* async connect -- this function will get called back into */
+ return 0;
+ }
+ }
+ lrmd_conn->cmds->set_callback(lrmd_conn, read_events);
+
+ if (options.timeout) {
+ g_timeout_add(options.timeout, timeout_err, NULL);
+ }
+
+ if (!options.api_call) {
+ return 0;
+ }
+
+ if (pcmk__str_eq(options.api_call, "exec", pcmk__str_casei)) {
+ rc = lrmd_conn->cmds->exec(lrmd_conn,
+ options.rsc_id,
+ options.action,
+ NULL,
+ options.interval_ms,
+ options.timeout,
+ options.start_delay,
+ options.exec_call_opts,
+ options.params);
+
+ if (rc > 0) {
+ exec_call_id = rc;
+ print_result("API-CALL 'exec' action pending, waiting on response");
+ }
+
+ } else if (pcmk__str_eq(options.api_call, "register_rsc", pcmk__str_casei)) {
+ rc = lrmd_conn->cmds->register_rsc(lrmd_conn,
+ options.rsc_id,
+ options.class, options.provider, options.type, 0);
+ } else if (pcmk__str_eq(options.api_call, "get_rsc_info", pcmk__str_casei)) {
+ lrmd_rsc_info_t *rsc_info;
+
+ rsc_info = lrmd_conn->cmds->get_rsc_info(lrmd_conn, options.rsc_id, 0);
+
+ if (rsc_info) {
+ print_result("RSC_INFO: id:%s class:%s provider:%s type:%s",
+ rsc_info->id, rsc_info->standard,
+ (rsc_info->provider? rsc_info->provider : "<none>"),
+ rsc_info->type);
+ lrmd_free_rsc_info(rsc_info);
+ rc = pcmk_ok;
+ } else {
+ rc = -1;
+ }
+ } else if (pcmk__str_eq(options.api_call, "unregister_rsc", pcmk__str_casei)) {
+ rc = lrmd_conn->cmds->unregister_rsc(lrmd_conn, options.rsc_id, 0);
+ } else if (pcmk__str_eq(options.api_call, "cancel", pcmk__str_casei)) {
+ rc = lrmd_conn->cmds->cancel(lrmd_conn, options.rsc_id, options.action,
+ options.interval_ms);
+ } else if (pcmk__str_eq(options.api_call, "metadata", pcmk__str_casei)) {
+ char *output = NULL;
+
+ rc = lrmd_conn->cmds->get_metadata(lrmd_conn,
+ options.class,
+ options.provider, options.type, &output, 0);
+ if (rc == pcmk_ok) {
+ print_result("%s", output);
+ free(output);
+ }
+ } else if (pcmk__str_eq(options.api_call, "list_agents", pcmk__str_casei)) {
+ lrmd_list_t *list = NULL;
+ lrmd_list_t *iter = NULL;
+
+ rc = lrmd_conn->cmds->list_agents(lrmd_conn, &list, options.class, options.provider);
+
+ if (rc > 0) {
+ print_result("%d agents found", rc);
+ for (iter = list; iter != NULL; iter = iter->next) {
+ print_result("%s", iter->val);
+ }
+ lrmd_list_freeall(list);
+ rc = 0;
+ } else {
+ print_result("API_CALL FAILURE - no agents found");
+ rc = -1;
+ }
+ } else if (pcmk__str_eq(options.api_call, "list_ocf_providers", pcmk__str_casei)) {
+ lrmd_list_t *list = NULL;
+ lrmd_list_t *iter = NULL;
+
+ rc = lrmd_conn->cmds->list_ocf_providers(lrmd_conn, options.type, &list);
+
+ if (rc > 0) {
+ print_result("%d providers found", rc);
+ for (iter = list; iter != NULL; iter = iter->next) {
+ print_result("%s", iter->val);
+ }
+ lrmd_list_freeall(list);
+ rc = 0;
+ } else {
+ print_result("API_CALL FAILURE - no providers found");
+ rc = -1;
+ }
+
+ } else if (pcmk__str_eq(options.api_call, "list_standards", pcmk__str_casei)) {
+ lrmd_list_t *list = NULL;
+ lrmd_list_t *iter = NULL;
+
+ rc = lrmd_conn->cmds->list_standards(lrmd_conn, &list);
+
+ if (rc > 0) {
+ print_result("%d standards found", rc);
+ for (iter = list; iter != NULL; iter = iter->next) {
+ print_result("%s", iter->val);
+ }
+ lrmd_list_freeall(list);
+ rc = 0;
+ } else {
+ print_result("API_CALL FAILURE - no providers found");
+ rc = -1;
+ }
+
+ } else if (pcmk__str_eq(options.api_call, "get_recurring_ops", pcmk__str_casei)) {
+ GList *op_list = NULL;
+ GList *op_item = NULL;
+ rc = lrmd_conn->cmds->get_recurring_ops(lrmd_conn, options.rsc_id, 0, 0,
+ &op_list);
+
+ for (op_item = op_list; op_item != NULL; op_item = op_item->next) {
+ lrmd_op_info_t *op_info = op_item->data;
+
+ print_result("RECURRING_OP: %s_%s_%s timeout=%sms",
+ op_info->rsc_id, op_info->action,
+ op_info->interval_ms_s, op_info->timeout_ms_s);
+ lrmd_free_op_info(op_info);
+ }
+ g_list_free(op_list);
+
+ } else if (options.api_call) {
+ print_result("API-CALL FAILURE unknown action '%s'", options.action);
+ test_exit(CRM_EX_ERROR);
+ }
+
+ if (rc < 0) {
+ print_result("API-CALL FAILURE for '%s' api_rc:%d",
+ options.api_call, rc);
+ test_exit(CRM_EX_ERROR);
+ }
+
+ if (options.api_call && rc == pcmk_ok) {
+ print_result("API-CALL SUCCESSFUL for '%s'", options.api_call);
+ if (!options.listen) {
+ test_exit(CRM_EX_OK);
+ }
+ }
+
+ if (options.no_wait) {
+ /* just make the call and exit regardless of anything else. */
+ test_exit(CRM_EX_OK);
+ }
+
+ return 0;
+}
+
+/*!
+ * \internal
+ * \brief Generate resource parameters from CIB if none explicitly given
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+generate_params(void)
+{
+ int rc = pcmk_rc_ok;
+ pe_working_set_t *data_set = NULL;
+ xmlNode *cib_xml_copy = NULL;
+ pe_resource_t *rsc = NULL;
+ GHashTable *params = NULL;
+ GHashTable *meta = NULL;
+ GHashTableIter iter;
+ char *key = NULL;
+ char *value = NULL;
+
+ if (options.params != NULL) {
+ return pcmk_rc_ok; // User specified parameters explicitly
+ }
+
+ // Retrieve and update CIB
+ rc = cib__signon_query(NULL, NULL, &cib_xml_copy);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+ if (!cli_config_update(&cib_xml_copy, NULL, FALSE)) {
+ crm_err("Could not update CIB");
+ return pcmk_rc_cib_corrupt;
+ }
+
+ // Calculate cluster status
+ data_set = pe_new_working_set();
+ if (data_set == NULL) {
+ crm_crit("Could not allocate working set");
+ return ENOMEM;
+ }
+ pe__set_working_set_flags(data_set, pe_flag_no_counts|pe_flag_no_compat);
+ data_set->input = cib_xml_copy;
+ data_set->now = crm_time_new(NULL);
+ cluster_status(data_set);
+
+ // Find resource in CIB
+ rsc = pe_find_resource_with_flags(data_set->resources, options.rsc_id,
+ pe_find_renamed|pe_find_any);
+ if (rsc == NULL) {
+ crm_err("Resource does not exist in config");
+ pe_free_working_set(data_set);
+ return EINVAL;
+ }
+
+ // Add resource instance parameters to options.params
+ params = pe_rsc_params(rsc, NULL, data_set);
+ if (params != NULL) {
+ g_hash_table_iter_init(&iter, params);
+ while (g_hash_table_iter_next(&iter, (gpointer *) &key,
+ (gpointer *) &value)) {
+ options.params = lrmd_key_value_add(options.params, key, value);
+ }
+ }
+
+ // Add resource meta-attributes to options.params
+ meta = pcmk__strkey_table(free, free);
+ get_meta_attributes(meta, rsc, NULL, data_set);
+ g_hash_table_iter_init(&iter, meta);
+ while (g_hash_table_iter_next(&iter, (gpointer *) &key,
+ (gpointer *) &value)) {
+ char *crm_name = crm_meta_name(key);
+
+ options.params = lrmd_key_value_add(options.params, crm_name, value);
+ free(crm_name);
+ }
+ g_hash_table_destroy(meta);
+
+ pe_free_working_set(data_set);
+ return rc;
+}
+
+static GOptionContext *
+build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
+ GOptionContext *context = NULL;
+
+ context = pcmk__build_arg_context(args, NULL, group, NULL);
+
+ pcmk__add_main_args(context, basic_entries);
+ pcmk__add_arg_group(context, "api-call", "API Call Options:",
+ "Parameters for api-call option", api_call_entries);
+
+ return context;
+}
+
+int
+main(int argc, char **argv)
+{
+ GError *error = NULL;
+ crm_exit_t exit_code = CRM_EX_OK;
+ crm_trigger_t *trig = NULL;
+
+ pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
+ /* Typically we'd pass all the single character options that take an argument
+ * as the second parameter here (and there's a bunch of those in this tool).
+ * However, we control how this program is called so we can just not call it
+ * in a way where the preprocessing ever matters.
+ */
+ gchar **processed_args = pcmk__cmdline_preproc(argv, NULL);
+ GOptionContext *context = build_arg_context(args, NULL);
+
+ if (!g_option_context_parse_strv(context, &processed_args, &error)) {
+ exit_code = CRM_EX_USAGE;
+ goto done;
+ }
+
+ /* We have to use crm_log_init here to set up the logging because there's
+ * different handling for daemons vs. command line programs, and
+ * pcmk__cli_init_logging is set up to only handle the latter.
+ */
+ crm_log_init(NULL, LOG_INFO, TRUE, (args->verbosity? TRUE : FALSE), argc,
+ argv, FALSE);
+
+ for (int i = 0; i < args->verbosity; i++) {
+ crm_bump_log_level(argc, argv);
+ }
+
+ if (!options.listen && pcmk__strcase_any_of(options.api_call, "metadata", "list_agents",
+ "list_standards", "list_ocf_providers", NULL)) {
+ options.no_connect = TRUE;
+ }
+
+ if (options.is_running) {
+ int rc = pcmk_rc_ok;
+
+ if (options.rsc_id == NULL) {
+ exit_code = CRM_EX_USAGE;
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "--is-running requires --rsc-id");
+ goto done;
+ }
+
+ options.interval_ms = 0;
+ if (options.timeout == 0) {
+ options.timeout = 30000;
+ }
+
+ rc = generate_params();
+ if (rc != pcmk_rc_ok) {
+ exit_code = pcmk_rc2exitc(rc);
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Can not determine resource status: "
+ "unable to get parameters from CIB");
+ goto done;
+ }
+ options.api_call = "exec";
+ options.action = "monitor";
+ options.exec_call_opts = lrmd_opt_notify_orig_only;
+ }
+
+ if (!options.api_call && !options.listen) {
+ exit_code = CRM_EX_USAGE;
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Must specify at least one of --api-call, --listen, "
+ "or --is-running");
+ goto done;
+ }
+
+ if (options.use_tls) {
+ lrmd_conn = lrmd_remote_api_new(NULL, "localhost", 0);
+ } else {
+ lrmd_conn = lrmd_api_new();
+ }
+ trig = mainloop_add_trigger(G_PRIORITY_HIGH, start_test, NULL);
+ mainloop_set_trigger(trig);
+ mainloop_add_signal(SIGTERM, test_shutdown);
+
+ crm_info("Starting");
+ mainloop = g_main_loop_new(NULL, FALSE);
+ g_main_loop_run(mainloop);
+
+done:
+ g_strfreev(processed_args);
+ pcmk__free_arg_context(context);
+
+ free(key);
+ free(val);
+
+ pcmk__output_and_clear_error(&error, NULL);
+ return test_exit(exit_code);
+}
diff --git a/daemons/execd/execd_alerts.c b/daemons/execd/execd_alerts.c
new file mode 100644
index 0000000..5944d93
--- /dev/null
+++ b/daemons/execd/execd_alerts.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright 2016-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <glib.h>
+
+#include <crm/crm.h>
+#include <crm/services.h>
+#include <crm/services_internal.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/common/alerts_internal.h>
+#include <crm/msg_xml.h>
+
+#include "pacemaker-execd.h"
+
+/* Track in-flight alerts so we can wait for them at shutdown */
+static GHashTable *inflight_alerts; /* key = call_id, value = timeout */
+static gboolean draining_alerts = FALSE;
+
+static inline void
+add_inflight_alert(int call_id, int timeout)
+{
+ if (inflight_alerts == NULL) {
+ inflight_alerts = pcmk__intkey_table(NULL);
+ }
+ pcmk__intkey_table_insert(inflight_alerts, call_id,
+ GINT_TO_POINTER(timeout));
+}
+
+static inline void
+remove_inflight_alert(int call_id)
+{
+ if (inflight_alerts != NULL) {
+ pcmk__intkey_table_remove(inflight_alerts, call_id);
+ }
+}
+
+static int
+max_inflight_timeout(void)
+{
+ GHashTableIter iter;
+ gpointer timeout;
+ int max_timeout = 0;
+
+ if (inflight_alerts) {
+ g_hash_table_iter_init(&iter, inflight_alerts);
+ while (g_hash_table_iter_next(&iter, NULL, &timeout)) {
+ if (GPOINTER_TO_INT(timeout) > max_timeout) {
+ max_timeout = GPOINTER_TO_INT(timeout);
+ }
+ }
+ }
+ return max_timeout;
+}
+
+struct alert_cb_s {
+ char *client_id;
+ int call_id;
+};
+
+static void
+alert_complete(svc_action_t *action)
+{
+ struct alert_cb_s *cb_data = (struct alert_cb_s *) (action->cb_data);
+
+ CRM_CHECK(cb_data != NULL, return);
+
+ remove_inflight_alert(cb_data->call_id);
+
+ if (action->status != PCMK_EXEC_DONE) {
+ const char *reason = services__exit_reason(action);
+
+ crm_notice("Could not send alert: %s%s%s%s " CRM_XS " client=%s",
+ pcmk_exec_status_str(action->status),
+ (reason == NULL)? "" : " (",
+ (reason == NULL)? "" : reason,
+ (reason == NULL)? "" : ")",
+ cb_data->client_id);
+
+ } else if (action->rc != 0) {
+ crm_notice("Alert [%d] completed but exited with status %d "
+ CRM_XS " client=%s",
+ action->pid, action->rc, cb_data->client_id);
+
+ } else {
+ crm_debug("Alert [%d] completed " CRM_XS " client=%s",
+ action->pid, cb_data->client_id);
+ }
+
+ free(cb_data->client_id);
+ free(action->cb_data);
+ action->cb_data = NULL;
+}
+
+int
+process_lrmd_alert_exec(pcmk__client_t *client, uint32_t id, xmlNode *request)
+{
+ static int alert_sequence_no = 0;
+
+ xmlNode *alert_xml = get_xpath_object("//" F_LRMD_ALERT, request, LOG_ERR);
+ const char *alert_id = crm_element_value(alert_xml, F_LRMD_ALERT_ID);
+ const char *alert_path = crm_element_value(alert_xml, F_LRMD_ALERT_PATH);
+ svc_action_t *action = NULL;
+ int alert_timeout = 0;
+ int rc = pcmk_ok;
+ GHashTable *params = NULL;
+ struct alert_cb_s *cb_data = NULL;
+
+ if ((alert_id == NULL) || (alert_path == NULL) ||
+ (client == NULL) || (client->id == NULL)) { /* hint static analyzer */
+ return -EINVAL;
+ }
+ if (draining_alerts) {
+ return pcmk_ok;
+ }
+
+ crm_element_value_int(alert_xml, F_LRMD_TIMEOUT, &alert_timeout);
+
+ crm_info("Executing alert %s for %s", alert_id, client->id);
+
+ params = xml2list(alert_xml);
+ pcmk__add_alert_key_int(params, PCMK__alert_key_node_sequence,
+ ++alert_sequence_no);
+
+ cb_data = calloc(1, sizeof(struct alert_cb_s));
+ if (cb_data == NULL) {
+ rc = -errno;
+ goto err;
+ }
+
+ /* coverity[deref_ptr] False Positive */
+ cb_data->client_id = strdup(client->id);
+ if (cb_data->client_id == NULL) {
+ rc = -errno;
+ goto err;
+ }
+
+ crm_element_value_int(request, F_LRMD_CALLID, &(cb_data->call_id));
+
+ action = services_alert_create(alert_id, alert_path, alert_timeout, params,
+ alert_sequence_no, cb_data);
+ if (action->rc != PCMK_OCF_UNKNOWN) {
+ rc = -E2BIG;
+ goto err;
+ }
+
+ rc = services_action_user(action, CRM_DAEMON_USER);
+ if (rc < 0) {
+ goto err;
+ }
+
+ add_inflight_alert(cb_data->call_id, alert_timeout);
+ if (services_alert_async(action, alert_complete) == FALSE) {
+ services_action_free(action);
+ }
+ return pcmk_ok;
+
+err:
+ if (cb_data) {
+ if (cb_data->client_id) {
+ free(cb_data->client_id);
+ }
+ free(cb_data);
+ }
+ services_action_free(action);
+ return rc;
+}
+
+static bool
+drain_check(guint remaining_timeout_ms)
+{
+ if (inflight_alerts != NULL) {
+ guint count = g_hash_table_size(inflight_alerts);
+
+ if (count > 0) {
+ crm_trace("%d alerts pending (%.3fs timeout remaining)",
+ count, remaining_timeout_ms / 1000.0);
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+void
+lrmd_drain_alerts(GMainLoop *mloop)
+{
+ if (inflight_alerts != NULL) {
+ guint timer_ms = max_inflight_timeout() + 5000;
+
+ crm_trace("Draining in-flight alerts (timeout %.3fs)",
+ timer_ms / 1000.0);
+ draining_alerts = TRUE;
+ pcmk_drain_main_loop(mloop, timer_ms, drain_check);
+ g_hash_table_destroy(inflight_alerts);
+ inflight_alerts = NULL;
+ }
+}
diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
new file mode 100644
index 0000000..fa2761e
--- /dev/null
+++ b/daemons/execd/execd_commands.c
@@ -0,0 +1,1927 @@
+/*
+ * Copyright 2012-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include <crm/fencing/internal.h>
+
+#include <glib.h>
+
+// Check whether we have a high-resolution monotonic clock
+#undef PCMK__TIME_USE_CGT
+#if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC)
+# define PCMK__TIME_USE_CGT
+# include <time.h> /* clock_gettime */
+#endif
+
+#include <unistd.h>
+
+#include <crm/crm.h>
+#include <crm/fencing/internal.h>
+#include <crm/services.h>
+#include <crm/services_internal.h>
+#include <crm/common/mainloop.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/msg_xml.h>
+
+#include "pacemaker-execd.h"
+
+GHashTable *rsc_list = NULL;
+
+typedef struct lrmd_cmd_s {
+ int timeout;
+ guint interval_ms;
+ int start_delay;
+ int timeout_orig;
+
+ int call_id;
+
+ int call_opts;
+ /* Timer ids, must be removed on cmd destruction. */
+ int delay_id;
+ int stonith_recurring_id;
+
+ int rsc_deleted;
+
+ int service_flags;
+
+ char *client_id;
+ char *origin;
+ char *rsc_id;
+ char *action;
+ char *real_action;
+ char *userdata_str;
+
+ pcmk__action_result_t result;
+
+ /* We can track operation queue time and run time, to be saved with the CIB
+ * resource history (and displayed in cluster status). We need
+ * high-resolution monotonic time for this purpose, so we use
+ * clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature
+ * is disabled).
+ *
+ * However, we also need epoch timestamps for recording the time the command
+ * last ran and the time its return value last changed, for use in time
+ * displays (as opposed to interval calculations). We keep time_t values for
+ * this purpose.
+ *
+ * The last run time is used for both purposes, so we keep redundant
+ * monotonic and epoch values for this. Technically the two could represent
+ * different times, but since time_t has only second resolution and the
+ * values are used for distinct purposes, that is not significant.
+ */
+#ifdef PCMK__TIME_USE_CGT
+ /* Recurring and systemd operations may involve more than one executor
+ * command per operation, so they need info about the original and the most
+ * recent.
+ */
+ struct timespec t_first_run; // When op first ran
+ struct timespec t_run; // When op most recently ran
+ struct timespec t_first_queue; // When op was first queued
+ struct timespec t_queue; // When op was most recently queued
+#endif
+ time_t epoch_last_run; // Epoch timestamp of when op last ran
+ time_t epoch_rcchange; // Epoch timestamp of when rc last changed
+
+ bool first_notify_sent;
+ int last_notify_rc;
+ int last_notify_op_status;
+ int last_pid;
+
+ GHashTable *params;
+} lrmd_cmd_t;
+
+static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
+static gboolean execute_resource_action(gpointer user_data);
+static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
+
+#ifdef PCMK__TIME_USE_CGT
+
+/*!
+ * \internal
+ * \brief Check whether a struct timespec has been set
+ *
+ * \param[in] timespec Time to check
+ *
+ * \return true if timespec has been set (i.e. is nonzero), false otherwise
+ */
+static inline bool
+time_is_set(const struct timespec *timespec)
+{
+ return (timespec != NULL) &&
+ ((timespec->tv_sec != 0) || (timespec->tv_nsec != 0));
+}
+
+/*
+ * \internal
+ * \brief Set a timespec (and its original if unset) to the current time
+ *
+ * \param[out] t_current Where to store current time
+ * \param[out] t_orig Where to copy t_current if unset
+ */
+static void
+get_current_time(struct timespec *t_current, struct timespec *t_orig)
+{
+ clock_gettime(CLOCK_MONOTONIC, t_current);
+ if ((t_orig != NULL) && !time_is_set(t_orig)) {
+ *t_orig = *t_current;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Return difference between two times in milliseconds
+ *
+ * \param[in] now More recent time (or NULL to use current time)
+ * \param[in] old Earlier time
+ *
+ * \return milliseconds difference (or 0 if old is NULL or unset)
+ *
+ * \note Can overflow on 32bit machines when the differences is around
+ * 24 days or more.
+ */
+static int
+time_diff_ms(const struct timespec *now, const struct timespec *old)
+{
+ int diff_ms = 0;
+
+ if (time_is_set(old)) {
+ struct timespec local_now = { 0, };
+
+ if (now == NULL) {
+ clock_gettime(CLOCK_MONOTONIC, &local_now);
+ now = &local_now;
+ }
+ diff_ms = (now->tv_sec - old->tv_sec) * 1000
+ + (now->tv_nsec - old->tv_nsec) / 1000000;
+ }
+ return diff_ms;
+}
+
+/*!
+ * \internal
+ * \brief Reset a command's operation times to their original values.
+ *
+ * Reset a command's run and queued timestamps to the timestamps of the original
+ * command, so we report the entire time since then and not just the time since
+ * the most recent command (for recurring and systemd operations).
+ *
+ * \param[in,out] cmd Executor command object to reset
+ *
+ * \note It's not obvious what the queued time should be for a systemd
+ * start/stop operation, which might go like this:
+ * initial command queued 5ms, runs 3s
+ * monitor command queued 10ms, runs 10s
+ * monitor command queued 10ms, runs 10s
+ * Is the queued time for that operation 5ms, 10ms or 25ms? The current
+ * implementation will report 5ms. If it's 25ms, then we need to
+ * subtract 20ms from the total exec time so as not to count it twice.
+ * We can implement that later if it matters to anyone ...
+ */
+static void
+cmd_original_times(lrmd_cmd_t * cmd)
+{
+ cmd->t_run = cmd->t_first_run;
+ cmd->t_queue = cmd->t_first_queue;
+}
+#endif
+
+static inline bool
+action_matches(const lrmd_cmd_t *cmd, const char *action, guint interval_ms)
+{
+ return (cmd->interval_ms == interval_ms)
+ && pcmk__str_eq(cmd->action, action, pcmk__str_casei);
+}
+
+/*!
+ * \internal
+ * \brief Log the result of an asynchronous command
+ *
+ * \param[in] cmd Command to log result for
+ * \param[in] exec_time_ms Execution time in milliseconds, if known
+ * \param[in] queue_time_ms Queue time in milliseconds, if known
+ */
+static void
+log_finished(const lrmd_cmd_t *cmd, int exec_time_ms, int queue_time_ms)
+{
+ int log_level = LOG_INFO;
+ GString *str = g_string_sized_new(100); // reasonable starting size
+
+ if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
+ log_level = LOG_DEBUG;
+ }
+
+ g_string_append_printf(str, "%s %s (call %d",
+ cmd->rsc_id, cmd->action, cmd->call_id);
+ if (cmd->last_pid != 0) {
+ g_string_append_printf(str, ", PID %d", cmd->last_pid);
+ }
+ if (cmd->result.execution_status == PCMK_EXEC_DONE) {
+ g_string_append_printf(str, ") exited with status %d",
+ cmd->result.exit_status);
+ } else {
+ pcmk__g_strcat(str, ") could not be executed: ",
+ pcmk_exec_status_str(cmd->result.execution_status),
+ NULL);
+ }
+ if (cmd->result.exit_reason != NULL) {
+ pcmk__g_strcat(str, " (", cmd->result.exit_reason, ")", NULL);
+ }
+
+#ifdef PCMK__TIME_USE_CGT
+ pcmk__g_strcat(str, " (execution time ",
+ pcmk__readable_interval(exec_time_ms), NULL);
+ if (queue_time_ms > 0) {
+ pcmk__g_strcat(str, " after being queued ",
+ pcmk__readable_interval(queue_time_ms), NULL);
+ }
+ g_string_append_c(str, ')');
+#endif
+
+ do_crm_log(log_level, "%s", str->str);
+ g_string_free(str, TRUE);
+}
+
+static void
+log_execute(lrmd_cmd_t * cmd)
+{
+ int log_level = LOG_INFO;
+
+ if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
+ log_level = LOG_DEBUG;
+ }
+
+ do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
+ cmd->rsc_id, cmd->action, cmd->call_id);
+}
+
+static const char *
+normalize_action_name(lrmd_rsc_t * rsc, const char *action)
+{
+ if (pcmk__str_eq(action, "monitor", pcmk__str_casei) &&
+ pcmk_is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) {
+ return "status";
+ }
+ return action;
+}
+
+static lrmd_rsc_t *
+build_rsc_from_xml(xmlNode * msg)
+{
+ xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
+ lrmd_rsc_t *rsc = NULL;
+
+ rsc = calloc(1, sizeof(lrmd_rsc_t));
+
+ crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts);
+
+ rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
+ rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS);
+ rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER);
+ rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE);
+ rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, execute_resource_action,
+ rsc);
+
+ // Initialize fence device probes (to return "not running")
+ pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
+ PCMK_EXEC_NO_FENCE_DEVICE, NULL);
+ return rsc;
+}
+
+static lrmd_cmd_t *
+create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client)
+{
+ int call_options = 0;
+ xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
+ lrmd_cmd_t *cmd = NULL;
+
+ cmd = calloc(1, sizeof(lrmd_cmd_t));
+
+ crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options);
+ cmd->call_opts = call_options;
+ cmd->client_id = strdup(client->id);
+
+ crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id);
+ crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval_ms);
+ crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout);
+ crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay);
+ cmd->timeout_orig = cmd->timeout;
+
+ cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN);
+ cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION);
+ cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR);
+ cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
+
+ cmd->params = xml2list(rsc_xml);
+
+ if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block", pcmk__str_casei)) {
+ crm_debug("Setting flag to leave pid group on timeout and "
+ "only kill action pid for " PCMK__OP_FMT,
+ cmd->rsc_id, cmd->action, cmd->interval_ms);
+ cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__,
+ LOG_TRACE, "Action",
+ cmd->action, 0,
+ SVC_ACTION_LEAVE_GROUP,
+ "SVC_ACTION_LEAVE_GROUP");
+ }
+ return cmd;
+}
+
+static void
+stop_recurring_timer(lrmd_cmd_t *cmd)
+{
+ if (cmd) {
+ if (cmd->stonith_recurring_id) {
+ g_source_remove(cmd->stonith_recurring_id);
+ }
+ cmd->stonith_recurring_id = 0;
+ }
+}
+
+static void
+free_lrmd_cmd(lrmd_cmd_t * cmd)
+{
+ stop_recurring_timer(cmd);
+ if (cmd->delay_id) {
+ g_source_remove(cmd->delay_id);
+ }
+ if (cmd->params) {
+ g_hash_table_destroy(cmd->params);
+ }
+ pcmk__reset_result(&(cmd->result));
+ free(cmd->origin);
+ free(cmd->action);
+ free(cmd->real_action);
+ free(cmd->userdata_str);
+ free(cmd->rsc_id);
+ free(cmd->client_id);
+ free(cmd);
+}
+
+static gboolean
+stonith_recurring_op_helper(gpointer data)
+{
+ lrmd_cmd_t *cmd = data;
+ lrmd_rsc_t *rsc;
+
+ cmd->stonith_recurring_id = 0;
+
+ if (!cmd->rsc_id) {
+ return FALSE;
+ }
+
+ rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
+
+ CRM_ASSERT(rsc != NULL);
+ /* take it out of recurring_ops list, and put it in the pending ops
+ * to be executed */
+ rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
+ rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
+#ifdef PCMK__TIME_USE_CGT
+ get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
+#endif
+ mainloop_set_trigger(rsc->work);
+
+ return FALSE;
+}
+
+static inline void
+start_recurring_timer(lrmd_cmd_t *cmd)
+{
+ if (cmd && (cmd->interval_ms > 0)) {
+ cmd->stonith_recurring_id = g_timeout_add(cmd->interval_ms,
+ stonith_recurring_op_helper,
+ cmd);
+ }
+}
+
+static gboolean
+start_delay_helper(gpointer data)
+{
+ lrmd_cmd_t *cmd = data;
+ lrmd_rsc_t *rsc = NULL;
+
+ cmd->delay_id = 0;
+ rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
+
+ if (rsc) {
+ mainloop_set_trigger(rsc->work);
+ }
+
+ return FALSE;
+}
+
+/*!
+ * \internal
+ * \brief Check whether a list already contains the equivalent of a given action
+ *
+ * \param[in] action_list List to search
+ * \param[in] cmd Action to search for
+ */
+static lrmd_cmd_t *
+find_duplicate_action(const GList *action_list, const lrmd_cmd_t *cmd)
+{
+ for (const GList *item = action_list; item != NULL; item = item->next) {
+ lrmd_cmd_t *dup = item->data;
+
+ if (action_matches(cmd, dup->action, dup->interval_ms)) {
+ return dup;
+ }
+ }
+ return NULL;
+}
+
+static bool
+merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
+{
+ lrmd_cmd_t * dup = NULL;
+ bool dup_pending = true;
+
+ if (cmd->interval_ms == 0) {
+ return false;
+ }
+
+ // Search for a duplicate of this action (in-flight or not)
+ dup = find_duplicate_action(rsc->pending_ops, cmd);
+ if (dup == NULL) {
+ dup_pending = false;
+ dup = find_duplicate_action(rsc->recurring_ops, cmd);
+ if (dup == NULL) {
+ return false;
+ }
+ }
+
+ /* Do not merge fencing monitors marked for cancellation, so we can reply to
+ * the cancellation separately.
+ */
+ if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
+ pcmk__str_casei)
+ && (dup->result.execution_status == PCMK_EXEC_CANCELLED)) {
+ return false;
+ }
+
+ /* This should not occur. If it does, we need to investigate how something
+ * like this is possible in the controller.
+ */
+ crm_warn("Duplicate recurring op entry detected (" PCMK__OP_FMT
+ "), merging with previous op entry",
+ rsc->rsc_id, normalize_action_name(rsc, dup->action),
+ dup->interval_ms);
+
+ // Merge new action's call ID and user data into existing action
+ dup->first_notify_sent = false;
+ free(dup->userdata_str);
+ dup->userdata_str = cmd->userdata_str;
+ cmd->userdata_str = NULL;
+ dup->call_id = cmd->call_id;
+ free_lrmd_cmd(cmd);
+ cmd = NULL;
+
+ /* If dup is not pending, that means it has already executed at least once
+ * and is waiting in the interval. In that case, stop waiting and initiate
+ * a new instance now.
+ */
+ if (!dup_pending) {
+ if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
+ pcmk__str_casei)) {
+ stop_recurring_timer(dup);
+ stonith_recurring_op_helper(dup);
+ } else {
+ services_action_kick(rsc->rsc_id,
+ normalize_action_name(rsc, dup->action),
+ dup->interval_ms);
+ }
+ }
+ return true;
+}
+
+static void
+schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
+{
+ CRM_CHECK(cmd != NULL, return);
+ CRM_CHECK(rsc != NULL, return);
+
+ crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
+
+ if (merge_recurring_duplicate(rsc, cmd)) {
+ // Equivalent of cmd has already been scheduled
+ return;
+ }
+
+ /* The controller expects the executor to automatically cancel
+ * recurring operations before a resource stops.
+ */
+ if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
+ cancel_all_recurring(rsc, NULL);
+ }
+
+ rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
+#ifdef PCMK__TIME_USE_CGT
+ get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
+#endif
+ mainloop_set_trigger(rsc->work);
+
+ if (cmd->start_delay) {
+ cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
+ }
+}
+
+static xmlNode *
+create_lrmd_reply(const char *origin, int rc, int call_id)
+{
+ xmlNode *reply = create_xml_node(NULL, T_LRMD_REPLY);
+
+ crm_xml_add(reply, F_LRMD_ORIGIN, origin);
+ crm_xml_add_int(reply, F_LRMD_RC, rc);
+ crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
+ return reply;
+}
+
+static void
+send_client_notify(gpointer key, gpointer value, gpointer user_data)
+{
+ xmlNode *update_msg = user_data;
+ pcmk__client_t *client = value;
+ int rc;
+ int log_level = LOG_WARNING;
+ const char *msg = NULL;
+
+ CRM_CHECK(client != NULL, return);
+ if (client->name == NULL) {
+ crm_trace("Skipping notification to client without name");
+ return;
+ }
+ if (pcmk_is_set(client->flags, pcmk__client_to_proxy)) {
+ /* We only want to notify clients of the executor IPC API. If we are
+ * running as Pacemaker Remote, we may have clients proxied to other
+ * IPC services in the cluster, so skip those.
+ */
+ crm_trace("Skipping executor API notification to client %s",
+ pcmk__client_name(client));
+ return;
+ }
+
+ rc = lrmd_server_send_notify(client, update_msg);
+ if (rc == pcmk_rc_ok) {
+ return;
+ }
+
+ switch (rc) {
+ case ENOTCONN:
+ case EPIPE: // Client exited without waiting for notification
+ log_level = LOG_INFO;
+ msg = "Disconnected";
+ break;
+
+ default:
+ msg = pcmk_rc_str(rc);
+ break;
+ }
+ do_crm_log(log_level, "Could not notify client %s: %s " CRM_XS " rc=%d",
+ pcmk__client_name(client), msg, rc);
+}
+
+static void
+send_cmd_complete_notify(lrmd_cmd_t * cmd)
+{
+ xmlNode *notify = NULL;
+ int exec_time = 0;
+ int queue_time = 0;
+
+#ifdef PCMK__TIME_USE_CGT
+ exec_time = time_diff_ms(NULL, &(cmd->t_run));
+ queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue));
+#endif
+ log_finished(cmd, exec_time, queue_time);
+
+ /* If the originator requested to be notified only for changes in recurring
+ * operation results, skip the notification if the result hasn't changed.
+ */
+ if (cmd->first_notify_sent
+ && pcmk_is_set(cmd->call_opts, lrmd_opt_notify_changes_only)
+ && (cmd->last_notify_rc == cmd->result.exit_status)
+ && (cmd->last_notify_op_status == cmd->result.execution_status)) {
+ return;
+ }
+
+ cmd->first_notify_sent = true;
+ cmd->last_notify_rc = cmd->result.exit_status;
+ cmd->last_notify_op_status = cmd->result.execution_status;
+
+ notify = create_xml_node(NULL, T_LRMD_NOTIFY);
+
+ crm_xml_add(notify, F_LRMD_ORIGIN, __func__);
+ crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout);
+ crm_xml_add_ms(notify, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
+ crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay);
+ crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->result.exit_status);
+ crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->result.execution_status);
+ crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id);
+ crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted);
+
+ crm_xml_add_ll(notify, F_LRMD_RSC_RUN_TIME,
+ (long long) cmd->epoch_last_run);
+ crm_xml_add_ll(notify, F_LRMD_RSC_RCCHANGE_TIME,
+ (long long) cmd->epoch_rcchange);
+#ifdef PCMK__TIME_USE_CGT
+ crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time);
+ crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time);
+#endif
+
+ crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC);
+ crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id);
+ if(cmd->real_action) {
+ crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action);
+ } else {
+ crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action);
+ }
+ crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
+ crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->result.exit_reason);
+
+ if (cmd->result.action_stderr != NULL) {
+ crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->result.action_stderr);
+
+ } else if (cmd->result.action_stdout != NULL) {
+ crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->result.action_stdout);
+ }
+
+ if (cmd->params) {
+ char *key = NULL;
+ char *value = NULL;
+ GHashTableIter iter;
+
+ xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS);
+
+ g_hash_table_iter_init(&iter, cmd->params);
+ while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
+ hash2smartfield((gpointer) key, (gpointer) value, args);
+ }
+ }
+ if ((cmd->client_id != NULL)
+ && pcmk_is_set(cmd->call_opts, lrmd_opt_notify_orig_only)) {
+
+ pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id);
+
+ if (client != NULL) {
+ send_client_notify(client->id, client, notify);
+ }
+ } else {
+ pcmk__foreach_ipc_client(send_client_notify, notify);
+ }
+
+ free_xml(notify);
+}
+
+static void
+send_generic_notify(int rc, xmlNode * request)
+{
+ if (pcmk__ipc_client_count() != 0) {
+ int call_id = 0;
+ xmlNode *notify = NULL;
+ xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
+ const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
+ const char *op = crm_element_value(request, F_LRMD_OPERATION);
+
+ crm_element_value_int(request, F_LRMD_CALLID, &call_id);
+
+ notify = create_xml_node(NULL, T_LRMD_NOTIFY);
+ crm_xml_add(notify, F_LRMD_ORIGIN, __func__);
+ crm_xml_add_int(notify, F_LRMD_RC, rc);
+ crm_xml_add_int(notify, F_LRMD_CALLID, call_id);
+ crm_xml_add(notify, F_LRMD_OPERATION, op);
+ crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id);
+
+ pcmk__foreach_ipc_client(send_client_notify, notify);
+
+ free_xml(notify);
+ }
+}
+
+static void
+cmd_reset(lrmd_cmd_t * cmd)
+{
+ cmd->last_pid = 0;
+#ifdef PCMK__TIME_USE_CGT
+ memset(&cmd->t_run, 0, sizeof(cmd->t_run));
+ memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
+#endif
+ cmd->epoch_last_run = 0;
+
+ pcmk__reset_result(&(cmd->result));
+ cmd->result.execution_status = PCMK_EXEC_DONE;
+}
+
+static void
+cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
+{
+ crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action,
+ rsc ? rsc->active : NULL, cmd);
+
+ if (rsc && (rsc->active == cmd)) {
+ rsc->active = NULL;
+ mainloop_set_trigger(rsc->work);
+ }
+
+ if (!rsc) {
+ cmd->rsc_deleted = 1;
+ }
+
+ /* reset original timeout so client notification has correct information */
+ cmd->timeout = cmd->timeout_orig;
+
+ send_cmd_complete_notify(cmd);
+
+ if ((cmd->interval_ms != 0)
+ && (cmd->result.execution_status == PCMK_EXEC_CANCELLED)) {
+
+ if (rsc) {
+ rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
+ rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
+ }
+ free_lrmd_cmd(cmd);
+ } else if (cmd->interval_ms == 0) {
+ if (rsc) {
+ rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
+ }
+ free_lrmd_cmd(cmd);
+ } else {
+ /* Clear all the values pertaining just to the last iteration of a recurring op. */
+ cmd_reset(cmd);
+ }
+}
+
+struct notify_new_client_data {
+ xmlNode *notify;
+ pcmk__client_t *new_client;
+};
+
+static void
+notify_one_client(gpointer key, gpointer value, gpointer user_data)
+{
+ pcmk__client_t *client = value;
+ struct notify_new_client_data *data = user_data;
+
+ if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) {
+ send_client_notify(key, (gpointer) client, (gpointer) data->notify);
+ }
+}
+
+void
+notify_of_new_client(pcmk__client_t *new_client)
+{
+ struct notify_new_client_data data;
+
+ data.new_client = new_client;
+ data.notify = create_xml_node(NULL, T_LRMD_NOTIFY);
+ crm_xml_add(data.notify, F_LRMD_ORIGIN, __func__);
+ crm_xml_add(data.notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT);
+ pcmk__foreach_ipc_client(notify_one_client, &data);
+ free_xml(data.notify);
+}
+
+void
+client_disconnect_cleanup(const char *client_id)
+{
+ GHashTableIter iter;
+ lrmd_rsc_t *rsc = NULL;
+ char *key = NULL;
+
+ g_hash_table_iter_init(&iter, rsc_list);
+ while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
+ if (pcmk_all_flags_set(rsc->call_opts, lrmd_opt_drop_recurring)) {
+ /* This client is disconnecting, drop any recurring operations
+ * it may have initiated on the resource */
+ cancel_all_recurring(rsc, client_id);
+ }
+ }
+}
+
+static void
+action_complete(svc_action_t * action)
+{
+ lrmd_rsc_t *rsc;
+ lrmd_cmd_t *cmd = action->cb_data;
+ enum ocf_exitcode code;
+
+#ifdef PCMK__TIME_USE_CGT
+ const char *rclass = NULL;
+ bool goagain = false;
+#endif
+
+ if (!cmd) {
+ crm_err("Completed executor action (%s) does not match any known operations",
+ action->id);
+ return;
+ }
+
+#ifdef PCMK__TIME_USE_CGT
+ if (cmd->result.exit_status != action->rc) {
+ cmd->epoch_rcchange = time(NULL);
+ }
+#endif
+
+ cmd->last_pid = action->pid;
+
+ // Cast variable instead of function return to keep compilers happy
+ code = services_result2ocf(action->standard, cmd->action, action->rc);
+ pcmk__set_result(&(cmd->result), (int) code,
+ action->status, services__exit_reason(action));
+
+ rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
+
+#ifdef PCMK__TIME_USE_CGT
+ if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) {
+ rclass = resources_find_service_class(rsc->type);
+ } else if(rsc) {
+ rclass = rsc->class;
+ }
+
+ if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
+ if (pcmk__result_ok(&(cmd->result))
+ && pcmk__strcase_any_of(cmd->action, "start", "stop", NULL)) {
+ /* systemd returns from start and stop actions after the action
+ * begins, not after it completes. We have to jump through a few
+ * hoops so that we don't report 'complete' to the rest of pacemaker
+ * until it's actually done.
+ */
+ goagain = true;
+ cmd->real_action = cmd->action;
+ cmd->action = strdup("monitor");
+
+ } else if (cmd->real_action != NULL) {
+ // This is follow-up monitor to check whether start/stop completed
+ if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
+ goagain = true;
+
+ } else if (pcmk__result_ok(&(cmd->result))
+ && pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) {
+ goagain = true;
+
+ } else {
+ int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
+ int timeout_left = cmd->timeout_orig - time_sum;
+
+ crm_debug("%s systemd %s is now complete (elapsed=%dms, "
+ "remaining=%dms): %s (%d)",
+ cmd->rsc_id, cmd->real_action, time_sum, timeout_left,
+ services_ocf_exitcode_str(cmd->result.exit_status),
+ cmd->result.exit_status);
+ cmd_original_times(cmd);
+
+ // Monitors may return "not running", but start/stop shouldn't
+ if ((cmd->result.execution_status == PCMK_EXEC_DONE)
+ && (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) {
+
+ if (pcmk__str_eq(cmd->real_action, "start", pcmk__str_casei)) {
+ cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR;
+ } else if (pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) {
+ cmd->result.exit_status = PCMK_OCF_OK;
+ }
+ }
+ }
+ }
+ }
+#endif
+
+#if SUPPORT_NAGIOS
+ if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
+ if (action_matches(cmd, "monitor", 0)
+ && pcmk__result_ok(&(cmd->result))) {
+ /* Successfully executed --version for the nagios plugin */
+ cmd->result.exit_status = PCMK_OCF_NOT_RUNNING;
+
+ } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)
+ && !pcmk__result_ok(&(cmd->result))) {
+#ifdef PCMK__TIME_USE_CGT
+ goagain = true;
+#endif
+ }
+ }
+#endif
+
+#ifdef PCMK__TIME_USE_CGT
+ if (goagain) {
+ int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
+ int timeout_left = cmd->timeout_orig - time_sum;
+ int delay = cmd->timeout_orig / 10;
+
+ if(delay >= timeout_left && timeout_left > 20) {
+ delay = timeout_left/2;
+ }
+
+ delay = QB_MIN(2000, delay);
+ if (delay < timeout_left) {
+ cmd->start_delay = delay;
+ cmd->timeout = timeout_left;
+
+ if (pcmk__result_ok(&(cmd->result))) {
+ crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
+ cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);
+
+ } else if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
+ crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
+ cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);
+
+ } else {
+ crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
+ cmd->rsc_id, cmd->action,
+ services_ocf_exitcode_str(cmd->result.exit_status),
+ cmd->result.exit_status, time_sum, timeout_left,
+ delay);
+ }
+
+ cmd_reset(cmd);
+ if(rsc) {
+ rsc->active = NULL;
+ }
+ schedule_lrmd_cmd(rsc, cmd);
+
+ /* Don't finalize cmd, we're not done with it yet */
+ return;
+
+ } else {
+ crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)",
+ cmd->rsc_id,
+ (cmd->real_action? cmd->real_action : cmd->action),
+ cmd->result.exit_status, time_sum, timeout_left);
+ pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
+ PCMK_EXEC_TIMEOUT,
+ "Investigate reason for timeout, and adjust "
+ "configured operation timeout if necessary");
+ cmd_original_times(cmd);
+ }
+ }
+#endif
+
+ pcmk__set_result_output(&(cmd->result), services__grab_stdout(action),
+ services__grab_stderr(action));
+ cmd_finalize(cmd, rsc);
+}
+
+/*!
+ * \internal
+ * \brief Process the result of a fence device action (start, stop, or monitor)
+ *
+ * \param[in,out] cmd Fence device action that completed
+ * \param[in] exit_status Fencer API exit status for action
+ * \param[in] execution_status Fencer API execution status for action
+ * \param[in] exit_reason Human-friendly detail, if action failed
+ */
+static void
+stonith_action_complete(lrmd_cmd_t *cmd, int exit_status,
+ enum pcmk_exec_status execution_status,
+ const char *exit_reason)
+{
+ // This can be NULL if resource was removed before command completed
+ lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
+
+ // Simplify fencer exit status to uniform exit status
+ if (exit_status != CRM_EX_OK) {
+ exit_status = PCMK_OCF_UNKNOWN_ERROR;
+ }
+
+ if (cmd->result.execution_status == PCMK_EXEC_CANCELLED) {
+ /* An in-flight fence action was cancelled. The execution status is
+ * already correct, so don't overwrite it.
+ */
+ execution_status = PCMK_EXEC_CANCELLED;
+
+ } else {
+ /* Some execution status codes have specific meanings for the fencer
+ * that executor clients may not expect, so map them to a simple error
+ * status.
+ */
+ switch (execution_status) {
+ case PCMK_EXEC_NOT_CONNECTED:
+ case PCMK_EXEC_INVALID:
+ execution_status = PCMK_EXEC_ERROR;
+ break;
+
+ case PCMK_EXEC_NO_FENCE_DEVICE:
+ /* This should be possible only for probes in practice, but
+ * interpret for all actions to be safe.
+ */
+ if (pcmk__str_eq(cmd->action, CRMD_ACTION_STATUS,
+ pcmk__str_none)) {
+ exit_status = PCMK_OCF_NOT_RUNNING;
+
+ } else if (pcmk__str_eq(cmd->action, CRMD_ACTION_STOP,
+ pcmk__str_none)) {
+ exit_status = PCMK_OCF_OK;
+
+ } else {
+ exit_status = PCMK_OCF_NOT_INSTALLED;
+ }
+ execution_status = PCMK_EXEC_ERROR;
+ break;
+
+ case PCMK_EXEC_NOT_SUPPORTED:
+ exit_status = PCMK_OCF_UNIMPLEMENT_FEATURE;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ pcmk__set_result(&cmd->result, exit_status, execution_status, exit_reason);
+
+ // Certain successful actions change the known state of the resource
+ if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) {
+
+ if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
+ pcmk__set_result(&rsc->fence_probe_result, CRM_EX_OK,
+ PCMK_EXEC_DONE, NULL); // "running"
+
+ } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
+ pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
+ PCMK_EXEC_NO_FENCE_DEVICE, NULL); // "not running"
+ }
+ }
+
+ /* The recurring timer should not be running at this point in any case, but
+ * as a failsafe, stop it if it is.
+ */
+ stop_recurring_timer(cmd);
+
+ /* Reschedule this command if appropriate. If a recurring command is *not*
+ * rescheduled, its status must be PCMK_EXEC_CANCELLED, otherwise it will
+ * not be removed from recurring_ops by cmd_finalize().
+ */
+ if (rsc && (cmd->interval_ms > 0)
+ && (cmd->result.execution_status != PCMK_EXEC_CANCELLED)) {
+ start_recurring_timer(cmd);
+ }
+
+ cmd_finalize(cmd, rsc);
+}
+
+static void
+lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
+{
+ if ((data == NULL) || (data->userdata == NULL)) {
+ crm_err("Ignoring fence action result: "
+ "Invalid callback arguments (bug?)");
+ } else {
+ stonith_action_complete((lrmd_cmd_t *) data->userdata,
+ stonith__exit_status(data),
+ stonith__execution_status(data),
+ stonith__exit_reason(data));
+ }
+}
+
+void
+stonith_connection_failed(void)
+{
+ GHashTableIter iter;
+ lrmd_rsc_t *rsc = NULL;
+
+ crm_warn("Connection to fencer lost (any pending operations for "
+ "fence devices will be considered failed)");
+
+ g_hash_table_iter_init(&iter, rsc_list);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &rsc)) {
+ if (!pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
+ pcmk__str_none)) {
+ continue;
+ }
+
+ /* If we registered this fence device, we don't know whether the
+ * fencer still has the registration or not. Cause future probes to
+ * return an error until the resource is stopped or started
+ * successfully. This is especially important if the controller also
+ * went away (possibly due to a cluster layer restart) and won't
+ * receive our client notification of any monitors finalized below.
+ */
+ if (rsc->fence_probe_result.execution_status == PCMK_EXEC_DONE) {
+ pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
+ PCMK_EXEC_NOT_CONNECTED,
+ "Lost connection to fencer");
+ }
+
+ // Consider any active, pending, or recurring operations as failed
+
+ for (GList *op = rsc->recurring_ops; op != NULL; op = op->next) {
+ lrmd_cmd_t *cmd = op->data;
+
+ /* This won't free a recurring op but instead restart its timer.
+ * If cmd is rsc->active, this will set rsc->active to NULL, so we
+ * don't have to worry about finalizing it a second time below.
+ */
+ stonith_action_complete(cmd,
+ CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED,
+ "Lost connection to fencer");
+ }
+
+ if (rsc->active != NULL) {
+ rsc->pending_ops = g_list_prepend(rsc->pending_ops, rsc->active);
+ }
+ while (rsc->pending_ops != NULL) {
+ // This will free the op and remove it from rsc->pending_ops
+ stonith_action_complete((lrmd_cmd_t *) rsc->pending_ops->data,
+ CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED,
+ "Lost connection to fencer");
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Execute a stonith resource "start" action
+ *
+ * Start a stonith resource by registering it with the fencer.
+ * (Stonith agents don't have a start command.)
+ *
+ * \param[in,out] stonith_api Connection to fencer
+ * \param[in] rsc Stonith resource to start
+ * \param[in] cmd Start command to execute
+ *
+ * \return pcmk_ok on success, -errno otherwise
+ */
+static int
+execd_stonith_start(stonith_t *stonith_api, const lrmd_rsc_t *rsc,
+ const lrmd_cmd_t *cmd)
+{
+ char *key = NULL;
+ char *value = NULL;
+ stonith_key_value_t *device_params = NULL;
+ int rc = pcmk_ok;
+
+ // Convert command parameters to stonith API key/values
+ if (cmd->params) {
+ GHashTableIter iter;
+
+ g_hash_table_iter_init(&iter, cmd->params);
+ while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
+ device_params = stonith_key_value_add(device_params, key, value);
+ }
+ }
+
+ /* The fencer will automatically register devices via CIB notifications
+ * when the CIB changes, but to avoid a possible race condition between
+ * the fencer receiving the notification and the executor requesting that
+ * resource, the executor registers the device as well. The fencer knows how
+ * to handle duplicate registrations.
+ */
+ rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call,
+ cmd->rsc_id, rsc->provider,
+ rsc->type, device_params);
+
+ stonith_key_value_freeall(device_params, 1, 1);
+ return rc;
+}
+
+/*!
+ * \internal
+ * \brief Execute a stonith resource "stop" action
+ *
+ * Stop a stonith resource by unregistering it with the fencer.
+ * (Stonith agents don't have a stop command.)
+ *
+ * \param[in,out] stonith_api Connection to fencer
+ * \param[in] rsc Stonith resource to stop
+ *
+ * \return pcmk_ok on success, -errno otherwise
+ */
+static inline int
+execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc)
+{
+ /* @TODO Failure would indicate a problem communicating with fencer;
+ * perhaps we should try reconnecting and retrying a few times?
+ */
+ return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call,
+ rsc->rsc_id);
+}
+
+/*!
+ * \internal
+ * \brief Initiate a stonith resource agent recurring "monitor" action
+ *
+ * \param[in,out] stonith_api Connection to fencer
+ * \param[in,out] rsc Stonith resource to monitor
+ * \param[in] cmd Monitor command being executed
+ *
+ * \return pcmk_ok if monitor was successfully initiated, -errno otherwise
+ */
+static inline int
+execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
+{
+ int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id,
+ cmd->timeout / 1000);
+
+ rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd,
+ "lrmd_stonith_callback",
+ lrmd_stonith_callback);
+ if (rc == TRUE) {
+ rsc->active = cmd;
+ rc = pcmk_ok;
+ } else {
+ rc = -pcmk_err_generic;
+ }
+ return rc;
+}
+
+static void
+execute_stonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
+{
+ int rc = 0;
+ bool do_monitor = FALSE;
+
+ stonith_t *stonith_api = get_stonith_connection();
+
+ if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)
+ && (cmd->interval_ms == 0)) {
+ // Probes don't require a fencer connection
+ stonith_action_complete(cmd, rsc->fence_probe_result.exit_status,
+ rsc->fence_probe_result.execution_status,
+ rsc->fence_probe_result.exit_reason);
+ return;
+
+ } else if (stonith_api == NULL) {
+ stonith_action_complete(cmd, PCMK_OCF_UNKNOWN_ERROR,
+ PCMK_EXEC_NOT_CONNECTED,
+ "No connection to fencer");
+ return;
+
+ } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
+ rc = execd_stonith_start(stonith_api, rsc, cmd);
+ if (rc == pcmk_ok) {
+ do_monitor = TRUE;
+ }
+
+ } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
+ rc = execd_stonith_stop(stonith_api, rsc);
+
+ } else if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
+ do_monitor = TRUE;
+
+ } else {
+ stonith_action_complete(cmd, PCMK_OCF_UNIMPLEMENT_FEATURE,
+ PCMK_EXEC_ERROR,
+ "Invalid fence device action (bug?)");
+ return;
+ }
+
+ if (do_monitor) {
+ rc = execd_stonith_monitor(stonith_api, rsc, cmd);
+ if (rc == pcmk_ok) {
+ // Don't clean up yet, we will find out result of the monitor later
+ return;
+ }
+ }
+
+ stonith_action_complete(cmd,
+ ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
+ stonith__legacy2status(rc),
+ ((rc == -pcmk_err_generic)? NULL : pcmk_strerror(rc)));
+}
+
+static void
+execute_nonstonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
+{
+ svc_action_t *action = NULL;
+ GHashTable *params_copy = NULL;
+
+ CRM_ASSERT(rsc);
+ CRM_ASSERT(cmd);
+
+ crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
+ rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
+
+#if SUPPORT_NAGIOS
+ /* Recurring operations are cancelled anyway for a stop operation */
+ if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)
+ && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
+
+ cmd->result.exit_status = PCMK_OCF_OK;
+ cmd_finalize(cmd, rsc);
+ return;
+ }
+#endif
+
+ params_copy = pcmk__str_table_dup(cmd->params);
+
+ action = services__create_resource_action(rsc->rsc_id, rsc->class, rsc->provider,
+ rsc->type,
+ normalize_action_name(rsc, cmd->action),
+ cmd->interval_ms, cmd->timeout,
+ params_copy, cmd->service_flags);
+
+ if (action == NULL) {
+ pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
+ PCMK_EXEC_ERROR, strerror(ENOMEM));
+ cmd_finalize(cmd, rsc);
+ return;
+ }
+
+ if (action->rc != PCMK_OCF_UNKNOWN) {
+ pcmk__set_result(&(cmd->result), action->rc, action->status,
+ services__exit_reason(action));
+ services_action_free(action);
+ cmd_finalize(cmd, rsc);
+ return;
+ }
+
+ action->cb_data = cmd;
+
+ if (services_action_async(action, action_complete)) {
+ /* The services library has taken responsibility for the action. It
+ * could be pending, blocked, or merged into a duplicate recurring
+ * action, in which case the action callback (action_complete())
+ * will be called when the action completes, otherwise the callback has
+ * already been called.
+ *
+ * action_complete() calls cmd_finalize() which can free cmd, so cmd
+ * cannot be used here.
+ */
+ } else {
+ /* This is a recurring action that is not being cancelled and could not
+ * be initiated. It has been rescheduled, and the action callback
+ * (action_complete()) has been called, which in this case has already
+ * called cmd_finalize(), which in this case should only reset (not
+ * free) cmd.
+ */
+
+ pcmk__set_result(&(cmd->result), action->rc, action->status,
+ services__exit_reason(action));
+ services_action_free(action);
+ }
+}
+
+static gboolean
+execute_resource_action(gpointer user_data)
+{
+ lrmd_rsc_t *rsc = (lrmd_rsc_t *) user_data;
+ lrmd_cmd_t *cmd = NULL;
+
+ CRM_CHECK(rsc != NULL, return FALSE);
+
+ if (rsc->active) {
+ crm_trace("%s is still active", rsc->rsc_id);
+ return TRUE;
+ }
+
+ if (rsc->pending_ops) {
+ GList *first = rsc->pending_ops;
+
+ cmd = first->data;
+ if (cmd->delay_id) {
+ crm_trace
+ ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
+ cmd->rsc_id, cmd->action, cmd->start_delay);
+ return TRUE;
+ }
+ rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
+ g_list_free_1(first);
+
+#ifdef PCMK__TIME_USE_CGT
+ get_current_time(&(cmd->t_run), &(cmd->t_first_run));
+#endif
+ cmd->epoch_last_run = time(NULL);
+ }
+
+ if (!cmd) {
+ crm_trace("Nothing further to do for %s", rsc->rsc_id);
+ return TRUE;
+ }
+
+ rsc->active = cmd; /* only one op at a time for a rsc */
+ if (cmd->interval_ms) {
+ rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
+ }
+
+ log_execute(cmd);
+
+ if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
+ execute_stonith_action(rsc, cmd);
+ } else {
+ execute_nonstonith_action(rsc, cmd);
+ }
+
+ return TRUE;
+}
+
+void
+free_rsc(gpointer data)
+{
+ GList *gIter = NULL;
+ lrmd_rsc_t *rsc = data;
+ int is_stonith = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
+ pcmk__str_casei);
+
+ gIter = rsc->pending_ops;
+ while (gIter != NULL) {
+ GList *next = gIter->next;
+ lrmd_cmd_t *cmd = gIter->data;
+
+ /* command was never executed */
+ cmd->result.execution_status = PCMK_EXEC_CANCELLED;
+ cmd_finalize(cmd, NULL);
+
+ gIter = next;
+ }
+ /* frees list, but not list elements. */
+ g_list_free(rsc->pending_ops);
+
+ gIter = rsc->recurring_ops;
+ while (gIter != NULL) {
+ GList *next = gIter->next;
+ lrmd_cmd_t *cmd = gIter->data;
+
+ if (is_stonith) {
+ cmd->result.execution_status = PCMK_EXEC_CANCELLED;
+ /* If a stonith command is in-flight, just mark it as cancelled;
+ * it is not safe to finalize/free the cmd until the stonith api
+ * says it has either completed or timed out.
+ */
+ if (rsc->active != cmd) {
+ cmd_finalize(cmd, NULL);
+ }
+ } else {
+ /* This command is already handed off to service library,
+ * let service library cancel it and tell us via the callback
+ * when it is cancelled. The rsc can be safely destroyed
+ * even if we are waiting for the cancel result */
+ services_action_cancel(rsc->rsc_id,
+ normalize_action_name(rsc, cmd->action),
+ cmd->interval_ms);
+ }
+
+ gIter = next;
+ }
+ /* frees list, but not list elements. */
+ g_list_free(rsc->recurring_ops);
+
+ free(rsc->rsc_id);
+ free(rsc->class);
+ free(rsc->provider);
+ free(rsc->type);
+ mainloop_destroy_trigger(rsc->work);
+
+ free(rsc);
+}
+
+static int
+process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id,
+ xmlNode **reply)
+{
+ int rc = pcmk_ok;
+ time_t now = time(NULL);
+ const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION);
+
+ if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) {
+ crm_err("Cluster API version must be greater than or equal to %s, not %s",
+ LRMD_MIN_PROTOCOL_VERSION, protocol_version);
+ rc = -EPROTO;
+ }
+
+ if (pcmk__xe_attr_is_true(request, F_LRMD_IS_IPC_PROVIDER)) {
+#ifdef PCMK__COMPILE_REMOTE
+ if ((client->remote != NULL)
+ && pcmk_is_set(client->flags,
+ pcmk__client_tls_handshake_complete)) {
+
+ // This is a remote connection from a cluster node's controller
+ ipc_proxy_add_provider(client);
+ } else {
+ rc = -EACCES;
+ }
+#else
+ rc = -EPROTONOSUPPORT;
+#endif
+ }
+
+ *reply = create_lrmd_reply(__func__, rc, call_id);
+ crm_xml_add(*reply, F_LRMD_OPERATION, CRM_OP_REGISTER);
+ crm_xml_add(*reply, F_LRMD_CLIENTID, client->id);
+ crm_xml_add(*reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
+ crm_xml_add_ll(*reply, PCMK__XA_UPTIME, now - start_time);
+
+ return rc;
+}
+
+static int
+process_lrmd_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request)
+{
+ int rc = pcmk_ok;
+ lrmd_rsc_t *rsc = build_rsc_from_xml(request);
+ lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
+
+ if (dup &&
+ pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) &&
+ pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) && pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) {
+
+ crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id);
+ free_rsc(rsc);
+ return rc;
+ }
+
+ g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
+ crm_info("Cached agent information for '%s'", rsc->rsc_id);
+ return rc;
+}
+
+static xmlNode *
+process_lrmd_get_rsc_info(xmlNode *request, int call_id)
+{
+ int rc = pcmk_ok;
+ xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
+ const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
+ xmlNode *reply = NULL;
+ lrmd_rsc_t *rsc = NULL;
+
+ if (rsc_id == NULL) {
+ rc = -ENODEV;
+ } else {
+ rsc = g_hash_table_lookup(rsc_list, rsc_id);
+ if (rsc == NULL) {
+ crm_info("Agent information for '%s' not in cache", rsc_id);
+ rc = -ENODEV;
+ }
+ }
+
+ reply = create_lrmd_reply(__func__, rc, call_id);
+ if (rsc) {
+ crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id);
+ crm_xml_add(reply, F_LRMD_CLASS, rsc->class);
+ crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider);
+ crm_xml_add(reply, F_LRMD_TYPE, rsc->type);
+ }
+ return reply;
+}
+
+static int
+process_lrmd_rsc_unregister(pcmk__client_t *client, uint32_t id,
+ xmlNode *request)
+{
+ int rc = pcmk_ok;
+ lrmd_rsc_t *rsc = NULL;
+ xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
+ const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
+
+ if (!rsc_id) {
+ return -ENODEV;
+ }
+
+ rsc = g_hash_table_lookup(rsc_list, rsc_id);
+ if (rsc == NULL) {
+ crm_info("Ignoring unregistration of resource '%s', which is not registered",
+ rsc_id);
+ return pcmk_ok;
+ }
+
+ if (rsc->active) {
+ /* let the caller know there are still active ops on this rsc to watch for */
+ crm_trace("Operation (%p) still in progress for unregistered resource %s",
+ rsc->active, rsc_id);
+ rc = -EINPROGRESS;
+ }
+
+ g_hash_table_remove(rsc_list, rsc_id);
+
+ return rc;
+}
+
+static int
+process_lrmd_rsc_exec(pcmk__client_t *client, uint32_t id, xmlNode *request)
+{
+ lrmd_rsc_t *rsc = NULL;
+ lrmd_cmd_t *cmd = NULL;
+ xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
+ const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
+ int call_id;
+
+ if (!rsc_id) {
+ return -EINVAL;
+ }
+ if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
+ crm_info("Resource '%s' not found (%d active resources)",
+ rsc_id, g_hash_table_size(rsc_list));
+ return -ENODEV;
+ }
+
+ cmd = create_lrmd_cmd(request, client);
+ call_id = cmd->call_id;
+
+ /* Don't reference cmd after handing it off to be scheduled.
+ * The cmd could get merged and freed. */
+ schedule_lrmd_cmd(rsc, cmd);
+
+ return call_id;
+}
+
+static int
+cancel_op(const char *rsc_id, const char *action, guint interval_ms)
+{
+ GList *gIter = NULL;
+ lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
+
+ /* How to cancel an action.
+ * 1. Check pending ops list, if it hasn't been handed off
+ * to the service library or stonith recurring list remove
+ * it there and that will stop it.
+ * 2. If it isn't in the pending ops list, then it's either a
+ * recurring op in the stonith recurring list, or the service
+ * library's recurring list. Stop it there
+ * 3. If not found in any lists, then this operation has either
+ * been executed already and is not a recurring operation, or
+ * never existed.
+ */
+ if (!rsc) {
+ return -ENODEV;
+ }
+
+ for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
+ lrmd_cmd_t *cmd = gIter->data;
+
+ if (action_matches(cmd, action, interval_ms)) {
+ cmd->result.execution_status = PCMK_EXEC_CANCELLED;
+ cmd_finalize(cmd, rsc);
+ return pcmk_ok;
+ }
+ }
+
+ if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
+ /* The service library does not handle stonith operations.
+ * We have to handle recurring stonith operations ourselves. */
+ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
+ lrmd_cmd_t *cmd = gIter->data;
+
+ if (action_matches(cmd, action, interval_ms)) {
+ cmd->result.execution_status = PCMK_EXEC_CANCELLED;
+ if (rsc->active != cmd) {
+ cmd_finalize(cmd, rsc);
+ }
+ return pcmk_ok;
+ }
+ }
+ } else if (services_action_cancel(rsc_id,
+ normalize_action_name(rsc, action),
+ interval_ms) == TRUE) {
+ /* The service library will tell the action_complete callback function
+ * this action was cancelled, which will destroy the cmd and remove
+ * it from the recurring_op list. Do not do that in this function
+ * if the service library says it cancelled it. */
+ return pcmk_ok;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static void
+cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
+{
+ GList *cmd_list = NULL;
+ GList *cmd_iter = NULL;
+
+ /* Notice a copy of each list is created when concat is called.
+ * This prevents odd behavior from occurring when the cmd_list
+ * is iterated through later on. It is possible the cancel_op
+ * function may end up modifying the recurring_ops and pending_ops
+ * lists. If we did not copy those lists, our cmd_list iteration
+ * could get messed up.*/
+ if (rsc->recurring_ops) {
+ cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
+ }
+ if (rsc->pending_ops) {
+ cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
+ }
+ if (!cmd_list) {
+ return;
+ }
+
+ for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
+ lrmd_cmd_t *cmd = cmd_iter->data;
+
+ if (cmd->interval_ms == 0) {
+ continue;
+ }
+
+ if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) {
+ continue;
+ }
+
+ cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms);
+ }
+ /* frees only the copied list data, not the cmds */
+ g_list_free(cmd_list);
+}
+
+static int
+process_lrmd_rsc_cancel(pcmk__client_t *client, uint32_t id, xmlNode *request)
+{
+ xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
+ const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
+ const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION);
+ guint interval_ms = 0;
+
+ crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &interval_ms);
+
+ if (!rsc_id || !action) {
+ return -EINVAL;
+ }
+
+ return cancel_op(rsc_id, action, interval_ms);
+}
+
+static void
+add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc)
+{
+ xmlNode *rsc_xml = create_xml_node(reply, F_LRMD_RSC);
+
+ crm_xml_add(rsc_xml, F_LRMD_RSC_ID, rsc->rsc_id);
+ for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) {
+ lrmd_cmd_t *cmd = item->data;
+ xmlNode *op_xml = create_xml_node(rsc_xml, T_LRMD_RSC_OP);
+
+ crm_xml_add(op_xml, F_LRMD_RSC_ACTION,
+ (cmd->real_action? cmd->real_action : cmd->action));
+ crm_xml_add_ms(op_xml, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
+ crm_xml_add_int(op_xml, F_LRMD_TIMEOUT, cmd->timeout_orig);
+ }
+}
+
+static xmlNode *
+process_lrmd_get_recurring(xmlNode *request, int call_id)
+{
+ int rc = pcmk_ok;
+ const char *rsc_id = NULL;
+ lrmd_rsc_t *rsc = NULL;
+ xmlNode *reply = NULL;
+ xmlNode *rsc_xml = NULL;
+
+ // Resource ID is optional
+ rsc_xml = first_named_child(request, F_LRMD_CALLDATA);
+ if (rsc_xml) {
+ rsc_xml = first_named_child(rsc_xml, F_LRMD_RSC);
+ }
+ if (rsc_xml) {
+ rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
+ }
+
+ // If resource ID is specified, resource must exist
+ if (rsc_id != NULL) {
+ rsc = g_hash_table_lookup(rsc_list, rsc_id);
+ if (rsc == NULL) {
+ crm_info("Resource '%s' not found (%d active resources)",
+ rsc_id, g_hash_table_size(rsc_list));
+ rc = -ENODEV;
+ }
+ }
+
+ reply = create_lrmd_reply(__func__, rc, call_id);
+
+ // If resource ID is not specified, check all resources
+ if (rsc_id == NULL) {
+ GHashTableIter iter;
+ char *key = NULL;
+
+ g_hash_table_iter_init(&iter, rsc_list);
+ while (g_hash_table_iter_next(&iter, (gpointer *) &key,
+ (gpointer *) &rsc)) {
+ add_recurring_op_xml(reply, rsc);
+ }
+ } else if (rsc) {
+ add_recurring_op_xml(reply, rsc);
+ }
+ return reply;
+}
+
+void
+process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request)
+{
+ int rc = pcmk_ok;
+ int call_id = 0;
+ const char *op = crm_element_value(request, F_LRMD_OPERATION);
+ int do_reply = 0;
+ int do_notify = 0;
+ xmlNode *reply = NULL;
+
+ /* Certain IPC commands may be done only by privileged users (i.e. root or
+ * hacluster), because they would otherwise provide a means of bypassing
+ * ACLs.
+ */
+ bool allowed = pcmk_is_set(client->flags, pcmk__client_privileged);
+
+ crm_trace("Processing %s operation from %s", op, client->id);
+ crm_element_value_int(request, F_LRMD_CALLID, &call_id);
+
+ if (pcmk__str_eq(op, CRM_OP_IPC_FWD, pcmk__str_none)) {
+#ifdef PCMK__COMPILE_REMOTE
+ if (allowed) {
+ ipc_proxy_forward_client(client, request);
+ } else {
+ rc = -EACCES;
+ }
+#else
+ rc = -EPROTONOSUPPORT;
+#endif
+ do_reply = 1;
+ } else if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) {
+ rc = process_lrmd_signon(client, request, call_id, &reply);
+ do_reply = 1;
+ } else if (pcmk__str_eq(op, LRMD_OP_RSC_REG, pcmk__str_none)) {
+ if (allowed) {
+ rc = process_lrmd_rsc_register(client, id, request);
+ do_notify = 1;
+ } else {
+ rc = -EACCES;
+ }
+ do_reply = 1;
+ } else if (pcmk__str_eq(op, LRMD_OP_RSC_INFO, pcmk__str_none)) {
+ if (allowed) {
+ reply = process_lrmd_get_rsc_info(request, call_id);
+ } else {
+ rc = -EACCES;
+ }
+ do_reply = 1;
+ } else if (pcmk__str_eq(op, LRMD_OP_RSC_UNREG, pcmk__str_none)) {
+ if (allowed) {
+ rc = process_lrmd_rsc_unregister(client, id, request);
+ /* don't notify anyone about failed un-registers */
+ if (rc == pcmk_ok || rc == -EINPROGRESS) {
+ do_notify = 1;
+ }
+ } else {
+ rc = -EACCES;
+ }
+ do_reply = 1;
+ } else if (pcmk__str_eq(op, LRMD_OP_RSC_EXEC, pcmk__str_none)) {
+ if (allowed) {
+ rc = process_lrmd_rsc_exec(client, id, request);
+ } else {
+ rc = -EACCES;
+ }
+ do_reply = 1;
+ } else if (pcmk__str_eq(op, LRMD_OP_RSC_CANCEL, pcmk__str_none)) {
+ if (allowed) {
+ rc = process_lrmd_rsc_cancel(client, id, request);
+ } else {
+ rc = -EACCES;
+ }
+ do_reply = 1;
+ } else if (pcmk__str_eq(op, LRMD_OP_POKE, pcmk__str_none)) {
+ do_notify = 1;
+ do_reply = 1;
+ } else if (pcmk__str_eq(op, LRMD_OP_CHECK, pcmk__str_none)) {
+ if (allowed) {
+ xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA);
+
+ CRM_LOG_ASSERT(data != NULL);
+ pcmk__valid_sbd_timeout(crm_element_value(data, F_LRMD_WATCHDOG));
+ } else {
+ rc = -EACCES;
+ }
+ } else if (pcmk__str_eq(op, LRMD_OP_ALERT_EXEC, pcmk__str_none)) {
+ if (allowed) {
+ rc = process_lrmd_alert_exec(client, id, request);
+ } else {
+ rc = -EACCES;
+ }
+ do_reply = 1;
+ } else if (pcmk__str_eq(op, LRMD_OP_GET_RECURRING, pcmk__str_none)) {
+ if (allowed) {
+ reply = process_lrmd_get_recurring(request, call_id);
+ } else {
+ rc = -EACCES;
+ }
+ do_reply = 1;
+ } else {
+ rc = -EOPNOTSUPP;
+ do_reply = 1;
+ crm_err("Unknown IPC request '%s' from client %s",
+ op, pcmk__client_name(client));
+ }
+
+ if (rc == -EACCES) {
+ crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
+ op, pcmk__client_name(client));
+ }
+
+ crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d",
+ op, client->id, rc, do_reply, do_notify);
+
+ if (do_reply) {
+ int send_rc = pcmk_rc_ok;
+
+ if (reply == NULL) {
+ reply = create_lrmd_reply(__func__, rc, call_id);
+ }
+ send_rc = lrmd_server_send_reply(client, id, reply);
+ free_xml(reply);
+ if (send_rc != pcmk_rc_ok) {
+ crm_warn("Reply to client %s failed: %s " CRM_XS " rc=%d",
+ pcmk__client_name(client), pcmk_rc_str(send_rc), send_rc);
+ }
+ }
+
+ if (do_notify) {
+ send_generic_notify(rc, request);
+ }
+}
diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c
new file mode 100644
index 0000000..83a8cd7
--- /dev/null
+++ b/daemons/execd/pacemaker-execd.c
@@ -0,0 +1,582 @@
+/*
+ * Copyright 2012-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <glib.h>
+#include <signal.h>
+#include <sys/types.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/services.h>
+#include <crm/common/cmdline_internal.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/common/mainloop.h>
+#include <crm/common/output_internal.h>
+#include <crm/common/remote_internal.h>
+#include <crm/lrmd_internal.h>
+
+#include "pacemaker-execd.h"
+
+#ifdef PCMK__COMPILE_REMOTE
+# define EXECD_TYPE "remote"
+# define EXECD_NAME "pacemaker-remoted"
+# define SUMMARY "resource agent executor daemon for Pacemaker Remote nodes"
+#else
+# define EXECD_TYPE "local"
+# define EXECD_NAME "pacemaker-execd"
+# define SUMMARY "resource agent executor daemon for Pacemaker cluster nodes"
+#endif
+
+static GMainLoop *mainloop = NULL;
+static qb_ipcs_service_t *ipcs = NULL;
+static stonith_t *stonith_api = NULL;
+int lrmd_call_id = 0;
+time_t start_time;
+
+static struct {
+ gchar **log_files;
+#ifdef PCMK__COMPILE_REMOTE
+ gchar *port;
+#endif // PCMK__COMPILE_REMOTE
+} options;
+
+#ifdef PCMK__COMPILE_REMOTE
+/* whether shutdown request has been sent */
+static gboolean shutting_down = FALSE;
+
+/* timer for waiting for acknowledgment of shutdown request */
+static guint shutdown_ack_timer = 0;
+
+static gboolean lrmd_exit(gpointer data);
+#endif
+
+static void
+stonith_connection_destroy_cb(stonith_t * st, stonith_event_t * e)
+{
+ stonith_api->state = stonith_disconnected;
+ stonith_connection_failed();
+}
+
+stonith_t *
+get_stonith_connection(void)
+{
+ if (stonith_api && stonith_api->state == stonith_disconnected) {
+ stonith_api_delete(stonith_api);
+ stonith_api = NULL;
+ }
+
+ if (stonith_api == NULL) {
+ int rc = pcmk_ok;
+
+ stonith_api = stonith_api_new();
+ if (stonith_api == NULL) {
+ crm_err("Could not connect to fencer: API memory allocation failed");
+ return NULL;
+ }
+ rc = stonith_api_connect_retry(stonith_api, crm_system_name, 10);
+ if (rc != pcmk_ok) {
+ crm_err("Could not connect to fencer in 10 attempts: %s "
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
+ stonith_api_delete(stonith_api);
+ stonith_api = NULL;
+ } else {
+ stonith_api->cmds->register_notification(stonith_api,
+ T_STONITH_NOTIFY_DISCONNECT,
+ stonith_connection_destroy_cb);
+ }
+ }
+ return stonith_api;
+}
+
+static int32_t
+lrmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
+{
+ crm_trace("Connection %p", c);
+ if (pcmk__new_client(c, uid, gid) == NULL) {
+ return -EIO;
+ }
+ return 0;
+}
+
+static void
+lrmd_ipc_created(qb_ipcs_connection_t * c)
+{
+ pcmk__client_t *new_client = pcmk__find_client(c);
+
+ crm_trace("Connection %p", c);
+ CRM_ASSERT(new_client != NULL);
+ /* Now that the connection is offically established, alert
+ * the other clients a new connection exists. */
+
+ notify_of_new_client(new_client);
+}
+
+static int32_t
+lrmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
+{
+ uint32_t id = 0;
+ uint32_t flags = 0;
+ pcmk__client_t *client = pcmk__find_client(c);
+ xmlNode *request = pcmk__client_data2xml(client, data, &id, &flags);
+
+ CRM_CHECK(client != NULL, crm_err("Invalid client");
+ return FALSE);
+ CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client);
+ return FALSE);
+
+ CRM_CHECK(flags & crm_ipc_client_response, crm_err("Invalid client request: %p", client);
+ return FALSE);
+
+ if (!request) {
+ return 0;
+ }
+
+ if (!client->name) {
+ const char *value = crm_element_value(request, F_LRMD_CLIENTNAME);
+
+ if (value == NULL) {
+ client->name = pcmk__itoa(pcmk__client_pid(c));
+ } else {
+ client->name = strdup(value);
+ }
+ }
+
+ lrmd_call_id++;
+ if (lrmd_call_id < 1) {
+ lrmd_call_id = 1;
+ }
+
+ crm_xml_add(request, F_LRMD_CLIENTID, client->id);
+ crm_xml_add(request, F_LRMD_CLIENTNAME, client->name);
+ crm_xml_add_int(request, F_LRMD_CALLID, lrmd_call_id);
+
+ process_lrmd_message(client, id, request);
+
+ free_xml(request);
+ return 0;
+}
+
+/*!
+ * \internal
+ * \brief Free a client connection, and exit if appropriate
+ *
+ * \param[in,out] client Client connection to free
+ */
+void
+lrmd_client_destroy(pcmk__client_t *client)
+{
+ pcmk__free_client(client);
+
+#ifdef PCMK__COMPILE_REMOTE
+ /* If we were waiting to shut down, we can now safely do so
+ * if there are no more proxied IPC providers
+ */
+ if (shutting_down && (ipc_proxy_get_provider() == NULL)) {
+ lrmd_exit(NULL);
+ }
+#endif
+}
+
+static int32_t
+lrmd_ipc_closed(qb_ipcs_connection_t * c)
+{
+ pcmk__client_t *client = pcmk__find_client(c);
+
+ if (client == NULL) {
+ return 0;
+ }
+
+ crm_trace("Connection %p", c);
+ client_disconnect_cleanup(client->id);
+#ifdef PCMK__COMPILE_REMOTE
+ ipc_proxy_remove_provider(client);
+#endif
+ lrmd_client_destroy(client);
+ return 0;
+}
+
+static void
+lrmd_ipc_destroy(qb_ipcs_connection_t * c)
+{
+ lrmd_ipc_closed(c);
+ crm_trace("Connection %p", c);
+}
+
+static struct qb_ipcs_service_handlers lrmd_ipc_callbacks = {
+ .connection_accept = lrmd_ipc_accept,
+ .connection_created = lrmd_ipc_created,
+ .msg_process = lrmd_ipc_dispatch,
+ .connection_closed = lrmd_ipc_closed,
+ .connection_destroyed = lrmd_ipc_destroy
+};
+
+// \return Standard Pacemaker return code
+int
+lrmd_server_send_reply(pcmk__client_t *client, uint32_t id, xmlNode *reply)
+{
+ crm_trace("Sending reply (%d) to client (%s)", id, client->id);
+ switch (PCMK__CLIENT_TYPE(client)) {
+ case pcmk__client_ipc:
+ return pcmk__ipc_send_xml(client, id, reply, FALSE);
+#ifdef PCMK__COMPILE_REMOTE
+ case pcmk__client_tls:
+ return lrmd__remote_send_xml(client->remote, reply, id, "reply");
+#endif
+ default:
+ crm_err("Could not send reply: unknown type for client %s "
+ CRM_XS " flags=%#llx",
+ pcmk__client_name(client), client->flags);
+ }
+ return ENOTCONN;
+}
+
+// \return Standard Pacemaker return code
+int
+lrmd_server_send_notify(pcmk__client_t *client, xmlNode *msg)
+{
+ crm_trace("Sending notification to client (%s)", client->id);
+ switch (PCMK__CLIENT_TYPE(client)) {
+ case pcmk__client_ipc:
+ if (client->ipcs == NULL) {
+ crm_trace("Could not notify local client: disconnected");
+ return ENOTCONN;
+ }
+ return pcmk__ipc_send_xml(client, 0, msg, crm_ipc_server_event);
+#ifdef PCMK__COMPILE_REMOTE
+ case pcmk__client_tls:
+ if (client->remote == NULL) {
+ crm_trace("Could not notify remote client: disconnected");
+ return ENOTCONN;
+ } else {
+ return lrmd__remote_send_xml(client->remote, msg, 0, "notify");
+ }
+#endif
+ default:
+ crm_err("Could not notify client %s with unknown transport "
+ CRM_XS " flags=%#llx",
+ pcmk__client_name(client), client->flags);
+ }
+ return ENOTCONN;
+}
+
+/*!
+ * \internal
+ * \brief Clean up and exit immediately
+ *
+ * \param[in] data Ignored
+ *
+ * \return Doesn't return
+ * \note This can be used as a timer callback.
+ */
+static gboolean
+lrmd_exit(gpointer data)
+{
+ crm_info("Terminating with %d clients", pcmk__ipc_client_count());
+ if (stonith_api) {
+ stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT);
+ stonith_api->cmds->disconnect(stonith_api);
+ stonith_api_delete(stonith_api);
+ }
+ if (ipcs) {
+ mainloop_del_ipc_server(ipcs);
+ }
+
+#ifdef PCMK__COMPILE_REMOTE
+ execd_stop_tls_server();
+ ipc_proxy_cleanup();
+#endif
+
+ pcmk__client_cleanup();
+ g_hash_table_destroy(rsc_list);
+
+ if (mainloop) {
+ lrmd_drain_alerts(mainloop);
+ }
+
+ crm_exit(CRM_EX_OK);
+ return FALSE;
+}
+
+/*!
+ * \internal
+ * \brief Request cluster shutdown if appropriate, otherwise exit immediately
+ *
+ * \param[in] nsig Signal that caused invocation (ignored)
+ */
+static void
+lrmd_shutdown(int nsig)
+{
+#ifdef PCMK__COMPILE_REMOTE
+ pcmk__client_t *ipc_proxy = ipc_proxy_get_provider();
+
+ /* If there are active proxied IPC providers, then we may be running
+ * resources, so notify the cluster that we wish to shut down.
+ */
+ if (ipc_proxy) {
+ if (shutting_down) {
+ crm_notice("Waiting for cluster to stop resources before exiting");
+ return;
+ }
+
+ crm_info("Sending shutdown request to cluster");
+ if (ipc_proxy_shutdown_req(ipc_proxy) < 0) {
+ crm_crit("Shutdown request failed, exiting immediately");
+
+ } else {
+ /* We requested a shutdown. Now, we need to wait for an
+ * acknowledgement from the proxy host (which ensures the proxy host
+ * supports shutdown requests), then wait for all proxy hosts to
+ * disconnect (which ensures that all resources have been stopped).
+ */
+ shutting_down = TRUE;
+
+ /* Stop accepting new proxy connections */
+ execd_stop_tls_server();
+
+ /* Older controller versions will never acknowledge our request, so
+ * set a fairly short timeout to exit quickly in that case. If we
+ * get the ack, we'll defuse this timer.
+ */
+ shutdown_ack_timer = g_timeout_add_seconds(20, lrmd_exit, NULL);
+
+ /* Currently, we let the OS kill us if the clients don't disconnect
+ * in a reasonable time. We could instead set a long timer here
+ * (shorter than what the OS is likely to use) and exit immediately
+ * if it pops.
+ */
+ return;
+ }
+ }
+#endif
+ lrmd_exit(NULL);
+}
+
+/*!
+ * \internal
+ * \brief Defuse short exit timer if shutting down
+ */
+void
+handle_shutdown_ack(void)
+{
+#ifdef PCMK__COMPILE_REMOTE
+ if (shutting_down) {
+ crm_info("Received shutdown ack");
+ if (shutdown_ack_timer > 0) {
+ g_source_remove(shutdown_ack_timer);
+ shutdown_ack_timer = 0;
+ }
+ return;
+ }
+#endif
+ crm_debug("Ignoring unexpected shutdown ack");
+}
+
+/*!
+ * \internal
+ * \brief Make short exit timer fire immediately
+ */
+void
+handle_shutdown_nack(void)
+{
+#ifdef PCMK__COMPILE_REMOTE
+ if (shutting_down) {
+ crm_info("Received shutdown nack");
+ if (shutdown_ack_timer > 0) {
+ g_source_remove(shutdown_ack_timer);
+ shutdown_ack_timer = g_timeout_add(0, lrmd_exit, NULL);
+ }
+ return;
+ }
+#endif
+ crm_debug("Ignoring unexpected shutdown nack");
+}
+
+static GOptionEntry entries[] = {
+ { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY,
+ &options.log_files, "Send logs to the additional named logfile", NULL },
+
+#ifdef PCMK__COMPILE_REMOTE
+
+ { "port", 'p', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.port,
+ "Port to listen on (defaults to " G_STRINGIFY(DEFAULT_REMOTE_PORT) ")", NULL },
+#endif // PCMK__COMPILE_REMOTE
+
+ { NULL }
+};
+
+static pcmk__supported_format_t formats[] = {
+ PCMK__SUPPORTED_FORMAT_NONE,
+ PCMK__SUPPORTED_FORMAT_TEXT,
+ PCMK__SUPPORTED_FORMAT_XML,
+ { NULL, NULL, NULL }
+};
+
+static GOptionContext *
+build_arg_context(pcmk__common_args_t *args, GOptionGroup **group)
+{
+ GOptionContext *context = NULL;
+
+ context = pcmk__build_arg_context(args, "text (default), xml", group, NULL);
+ pcmk__add_main_args(context, entries);
+ return context;
+}
+
+int
+main(int argc, char **argv, char **envp)
+{
+ int rc = pcmk_rc_ok;
+ crm_exit_t exit_code = CRM_EX_OK;
+
+ const char *option = NULL;
+
+ pcmk__output_t *out = NULL;
+
+ GError *error = NULL;
+
+ GOptionGroup *output_group = NULL;
+ pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
+#ifdef PCMK__COMPILE_REMOTE
+ gchar **processed_args = pcmk__cmdline_preproc(argv, "lp");
+#else
+ gchar **processed_args = pcmk__cmdline_preproc(argv, "l");
+#endif // PCMK__COMPILE_REMOTE
+ GOptionContext *context = build_arg_context(args, &output_group);
+
+#ifdef PCMK__COMPILE_REMOTE
+ // If necessary, create PID 1 now before any file descriptors are opened
+ remoted_spawn_pidone(argc, argv, envp);
+#endif
+
+ crm_log_preinit(EXECD_NAME, argc, argv);
+
+ pcmk__register_formats(output_group, formats);
+ if (!g_option_context_parse_strv(context, &processed_args, &error)) {
+ exit_code = CRM_EX_USAGE;
+ goto done;
+ }
+
+ rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
+ if (rc != pcmk_rc_ok) {
+ exit_code = CRM_EX_ERROR;
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Error creating output format %s: %s",
+ args->output_ty, pcmk_rc_str(rc));
+ goto done;
+ }
+
+ if (args->version) {
+ out->version(out, false);
+ goto done;
+ }
+
+ // Open additional log files
+ if (options.log_files != NULL) {
+ for (gchar **fname = options.log_files; *fname != NULL; fname++) {
+ rc = pcmk__add_logfile(*fname);
+
+ if (rc != pcmk_rc_ok) {
+ out->err(out, "Logging to %s is disabled: %s",
+ *fname, pcmk_rc_str(rc));
+ }
+ }
+ }
+
+ pcmk__cli_init_logging(EXECD_NAME, args->verbosity);
+ crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
+
+ option = pcmk__env_option(PCMK__ENV_LOGFACILITY);
+ if (!pcmk__str_eq(option, PCMK__VALUE_NONE,
+ pcmk__str_casei|pcmk__str_null_matches)
+ && !pcmk__str_eq(option, "/dev/null", pcmk__str_none)) {
+ setenv("HA_LOGFACILITY", option, 1); /* Used by the ocf_log/ha_log OCF macro */
+ }
+
+ option = pcmk__env_option(PCMK__ENV_LOGFILE);
+ if (!pcmk__str_eq(option, PCMK__VALUE_NONE,
+ pcmk__str_casei|pcmk__str_null_matches)) {
+ setenv("HA_LOGFILE", option, 1); /* Used by the ocf_log/ha_log OCF macro */
+
+ if (pcmk__env_option_enabled(crm_system_name, PCMK__ENV_DEBUG)) {
+ setenv("HA_DEBUGLOG", option, 1); /* Used by the ocf_log/ha_debug OCF macro */
+ }
+ }
+
+#ifdef PCMK__COMPILE_REMOTE
+ if (options.port != NULL) {
+ setenv("PCMK_remote_port", options.port, 1);
+ }
+#endif // PCMK__COMPILE_REMOTE
+
+ start_time = time(NULL);
+
+ crm_notice("Starting Pacemaker " EXECD_TYPE " executor");
+
+ /* The presence of this variable allegedly controls whether child
+ * processes like httpd will try and use Systemd's sd_notify
+ * API
+ */
+ unsetenv("NOTIFY_SOCKET");
+
+ {
+ // Temporary directory for resource agent use (leave owned by root)
+ int rc = pcmk__build_path(CRM_RSCTMP_DIR, 0755);
+
+ if (rc != pcmk_rc_ok) {
+ crm_warn("Could not create resource agent temporary directory "
+ CRM_RSCTMP_DIR ": %s", pcmk_rc_str(rc));
+ }
+ }
+
+ rsc_list = pcmk__strkey_table(NULL, free_rsc);
+ ipcs = mainloop_add_ipc_server(CRM_SYSTEM_LRMD, QB_IPC_SHM, &lrmd_ipc_callbacks);
+ if (ipcs == NULL) {
+ crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
+ exit_code = CRM_EX_FATAL;
+ goto done;
+ }
+
+#ifdef PCMK__COMPILE_REMOTE
+ if (lrmd_init_remote_tls_server() < 0) {
+ crm_err("Failed to create TLS listener: shutting down and staying down");
+ exit_code = CRM_EX_FATAL;
+ goto done;
+ }
+ ipc_proxy_init();
+#endif
+
+ mainloop_add_signal(SIGTERM, lrmd_shutdown);
+ mainloop = g_main_loop_new(NULL, FALSE);
+ crm_notice("Pacemaker " EXECD_TYPE " executor successfully started and accepting connections");
+ crm_notice("OCF resource agent search path is %s", OCF_RA_PATH);
+ g_main_loop_run(mainloop);
+
+ /* should never get here */
+ lrmd_exit(NULL);
+
+done:
+ g_strfreev(options.log_files);
+#ifdef PCMK__COMPILE_REMOTE
+ g_free(options.port);
+#endif // PCMK__COMPILE_REMOTE
+
+ g_strfreev(processed_args);
+ pcmk__free_arg_context(context);
+
+ pcmk__output_and_clear_error(&error, out);
+
+ if (out != NULL) {
+ out->finish(out, exit_code, true, NULL);
+ pcmk__output_free(out);
+ }
+ pcmk__unregister_formats();
+ crm_exit(exit_code);
+}
diff --git a/daemons/execd/pacemaker-execd.h b/daemons/execd/pacemaker-execd.h
new file mode 100644
index 0000000..9c1d173
--- /dev/null
+++ b/daemons/execd/pacemaker-execd.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2012-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef PACEMAKER_EXECD__H
+# define PACEMAKER_EXECD__H
+
+# include <glib.h>
+# include <crm/common/ipc_internal.h>
+# include <crm/lrmd.h>
+# include <crm/stonith-ng.h>
+
+# ifdef HAVE_GNUTLS_GNUTLS_H
+# include <gnutls/gnutls.h>
+# endif
+
+extern GHashTable *rsc_list;
+extern time_t start_time;
+
+typedef struct lrmd_rsc_s {
+ char *rsc_id;
+ char *class;
+ char *provider;
+ char *type;
+
+ int call_opts;
+
+ /* NEVER dereference this pointer,
+ * It simply exists as a switch to let us know
+ * when the currently active operation has completed */
+ void *active;
+
+ /* Operations in this list
+ * have not been executed yet. */
+ GList *pending_ops;
+ /* Operations in this list are recurring operations
+ * that have been handed off from the pending ops list. */
+ GList *recurring_ops;
+
+ /* If this resource is a fence device, probes are handled internally by the
+ * executor, and this value indicates the result that should currently be
+ * returned for probes. It should be one of:
+ * PCMK_EXEC_DONE (to indicate "running"),
+ * PCMK_EXEC_NO_FENCE_DEVICE ("not running"), or
+ * PCMK_EXEC_NOT_CONNECTED ("unknown because fencer connection was lost").
+ */
+ pcmk__action_result_t fence_probe_result;
+
+ crm_trigger_t *work;
+} lrmd_rsc_t;
+
+# ifdef HAVE_GNUTLS_GNUTLS_H
+// in remoted_tls.c
+int lrmd_init_remote_tls_server(void);
+void execd_stop_tls_server(void);
+# endif
+
+int lrmd_server_send_reply(pcmk__client_t *client, uint32_t id, xmlNode *reply);
+
+int lrmd_server_send_notify(pcmk__client_t *client, xmlNode *msg);
+
+void notify_of_new_client(pcmk__client_t *new_client);
+
+void process_lrmd_message(pcmk__client_t *client, uint32_t id,
+ xmlNode *request);
+
+void free_rsc(gpointer data);
+
+void handle_shutdown_ack(void);
+
+void handle_shutdown_nack(void);
+
+void lrmd_client_destroy(pcmk__client_t *client);
+
+void client_disconnect_cleanup(const char *client_id);
+
+/*!
+ * \brief Don't worry about freeing this connection. It is
+ * taken care of after mainloop exits by the main() function.
+ */
+stonith_t *get_stonith_connection(void);
+
+/*!
+ * \brief This is a callback that tells the lrmd
+ * the current stonith connection has gone away. This allows
+ * us to timeout any pending stonith commands
+ */
+void stonith_connection_failed(void);
+
+#ifdef PCMK__COMPILE_REMOTE
+void ipc_proxy_init(void);
+void ipc_proxy_cleanup(void);
+void ipc_proxy_add_provider(pcmk__client_t *client);
+void ipc_proxy_remove_provider(pcmk__client_t *client);
+void ipc_proxy_forward_client(pcmk__client_t *client, xmlNode *xml);
+pcmk__client_t *ipc_proxy_get_provider(void);
+int ipc_proxy_shutdown_req(pcmk__client_t *ipc_proxy);
+void remoted_spawn_pidone(int argc, char **argv, char **envp);
+#endif
+
+int process_lrmd_alert_exec(pcmk__client_t *client, uint32_t id,
+ xmlNode *request);
+void lrmd_drain_alerts(GMainLoop *mloop);
+
+#endif // PACEMAKER_EXECD__H
diff --git a/daemons/execd/pacemaker-remoted.8.inc b/daemons/execd/pacemaker-remoted.8.inc
new file mode 100644
index 0000000..bc86acc
--- /dev/null
+++ b/daemons/execd/pacemaker-remoted.8.inc
@@ -0,0 +1,5 @@
+[synopsis]
+pacemaker-remoted [options]
+
+/for Pacemaker Remote nodes/
+.SH OPTIONS
diff --git a/daemons/execd/pacemaker_remote.in b/daemons/execd/pacemaker_remote.in
new file mode 100644
index 0000000..2096c5f
--- /dev/null
+++ b/daemons/execd/pacemaker_remote.in
@@ -0,0 +1,176 @@
+#!@BASH_PATH@
+
+# Authors:
+# Andrew Beekhof <abeekhof@redhat.com>
+#
+# License: Revised BSD
+
+# chkconfig: - 99 01
+# description: Pacemaker Cluster Manager
+# processname: pacemaker-remoted
+#
+### BEGIN INIT INFO
+# Provides: pacemaker_remote
+# Required-Start: $network $remote_fs
+# Should-Start: $syslog
+# Required-Stop: $network $remote_fs
+# Default-Start:
+# Default-Stop:
+# Short-Description: Manage the executor for Pacemaker Remote nodes
+# Description: Manage the executor for Pacemaker Remote nodes
+### END INIT INFO
+
+desc="Pacemaker Remote Executor"
+prog="pacemaker-remoted"
+
+# set secure PATH
+PATH="/sbin:/bin:/usr/sbin:/usr/bin:@sbindir@"
+
+checkrc() {
+ if [ $? = 0 ]; then
+ success
+ else
+ failure
+ fi
+}
+
+success()
+{
+ echo -ne "[ OK ]\r"
+}
+
+failure()
+{
+ echo -ne "[FAILED]\r"
+}
+
+status()
+{
+ pid=$(pidof $1 2>/dev/null)
+ local rtrn=$?
+ if [ $rtrn -ne 0 ]; then
+ echo "$1 is stopped"
+ if [ -f "@localstatedir@/run/$prog.pid" ]; then
+ rtrn=1
+ else
+ rtrn=3
+ fi
+ else
+ echo "$1 (pid $pid) is running..."
+ fi
+ return $rtrn
+}
+
+if [ -d @CONFIGDIR@ ]; then
+ [ -f @INITDIR@/functions ] && . @INITDIR@/functions
+set -a
+ [ -f @CONFIGDIR@/pacemaker ] && . @CONFIGDIR@/pacemaker
+ [ -f @CONFIGDIR@/sbd ] && . @CONFIGDIR@/sbd
+set +a
+fi
+
+LOCK_DIR="."
+if [ -d "@localstatedir@/lock/subsys" ]; then
+ LOCK_DIR="@localstatedir@/lock/subsys"
+elif [ -d "@localstatedir@/lock" ]; then
+ LOCK_DIR="@localstatedir@/lock"
+fi
+[ -z "$LOCK_FILE" ] && LOCK_FILE="$LOCK_DIR/pacemaker_remote"
+
+# Check if there is a valid watchdog-device configured in sbd config
+if [ x != "x$SBD_WATCHDOG_DEV" -a "/dev/null" != "$SBD_WATCHDOG_DEV" -a -c "$SBD_WATCHDOG_DEV" ]; then
+ # enhance for unavailable chkconfig - don't touch sbd for now
+ if chkconfig --list sbd_remote_helper 2>/dev/null | grep -q ":on"; then
+ SBD_SERVICE=sbd_remote_helper
+ fi
+fi
+
+start()
+{
+ echo -n "Starting $desc: "
+
+ # most recent distributions use tmpfs for $@localstatedir@/run
+ # to avoid to clean it up on every boot.
+ # they also assume that init scripts will create
+ # required subdirectories for proper operations
+ mkdir -p "@localstatedir@/run"
+
+ if status $prog > /dev/null 2>&1; then
+ success
+ else
+ $prog > /dev/null 2>&1 &
+
+ # Time to connect to corosync and fail
+ sleep 5
+
+ if status $prog > /dev/null 2>&1; then
+ touch "$LOCK_FILE"
+ pidof $prog > "@localstatedir@/run/$prog.pid"
+ success
+ else
+ failure
+ rtrn=1
+ fi
+ fi
+ echo
+
+ [ "x$SBD_SERVICE" = "x" ] || service $SBD_SERVICE start
+}
+
+stop()
+{
+ if status $prog > /dev/null 2>&1; then
+ echo -n "Signaling $desc to terminate: "
+ kill -TERM $(pidof $prog) > /dev/null 2>&1
+ success
+ echo
+
+ echo -n "Waiting for $desc to unload:"
+ while status $prog > /dev/null 2>&1; do
+ sleep 1
+ echo -n "."
+ done
+ else
+ echo -n "$desc is already stopped"
+ fi
+
+ rm -f "$LOCK_FILE"
+ rm -f "@localstatedir@/run/$prog.pid"
+ success
+ echo
+
+ [ "x$SBD_SERVICE" = "x" ] || service $SBD_SERVICE stop
+}
+
+rtrn=0
+
+case "$1" in
+start)
+ start
+;;
+restart|reload|force-reload)
+ stop
+ start
+;;
+condrestart|try-restart)
+ if status $prog > /dev/null 2>&1; then
+ stop
+ start
+ rtrn=$?
+ fi
+;;
+status)
+ status $prog
+ rtrn=$?
+;;
+stop)
+ stop
+ rtrn=$?
+;;
+*)
+ echo "usage: $0 {start|stop|restart|reload|force-reload|condrestart|try-restart|status}"
+ rtrn=2
+;;
+esac
+
+exit $rtrn
diff --git a/daemons/execd/pacemaker_remote.service.in b/daemons/execd/pacemaker_remote.service.in
new file mode 100644
index 0000000..1e48d14
--- /dev/null
+++ b/daemons/execd/pacemaker_remote.service.in
@@ -0,0 +1,52 @@
+[Unit]
+Description=Pacemaker Remote executor daemon
+Documentation=man:pacemaker-remoted
+Documentation=https://clusterlabs.org/pacemaker/doc/
+
+# See main pacemaker unit file for descriptions of why these are needed
+After=network.target
+After=time-sync.target
+After=dbus.service
+Wants=dbus.service
+After=resource-agents-deps.target
+Wants=resource-agents-deps.target
+After=syslog.service
+After=rsyslog.service
+
+[Install]
+Alias=pacemaker-remote.service
+WantedBy=multi-user.target
+
+[Service]
+Type=simple
+KillMode=process
+NotifyAccess=none
+EnvironmentFile=-@CONFIGDIR@/pacemaker
+EnvironmentFile=-@CONFIGDIR@/sbd
+
+# Not actually success, but fatal failure -- this ensures no respawn
+SuccessExitStatus=100
+
+ExecStart=@sbindir@/pacemaker-remoted
+
+# Systemd v227 and above can limit the number of processes spawned by a
+# service. That is a bad idea for an HA cluster resource manager, so disable it
+# by default. The administrator can create a local override if they really want
+# a limit. If your systemd version does not support TasksMax, and you want to
+# get rid of the resulting log warnings, comment out this option.
+TasksMax=infinity
+
+# If connected to the cluster and when the service functions properly, it will
+# wait to exit until the cluster notifies it all resources on the remote node
+# have been stopped. The default of 30min should cover most typical cluster
+# configurations, but it may need an increase to adapt to local conditions
+# (e.g. a large, clustered database could conceivably take longer to stop).
+TimeoutStopSec=30min
+TimeoutStartSec=30s
+
+# Restart options include: no, on-success, on-failure, on-abort or always
+Restart=on-failure
+
+# crm_perror() writes directly to stderr, so ignore it here
+# to avoid double-logging with the wrong format
+StandardError=null
diff --git a/daemons/execd/remoted_pidone.c b/daemons/execd/remoted_pidone.c
new file mode 100644
index 0000000..4f914eb
--- /dev/null
+++ b/daemons/execd/remoted_pidone.c
@@ -0,0 +1,298 @@
+/*
+ * Copyright 2017-2020 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdio.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include <crm/crm.h>
+#include "pacemaker-execd.h"
+
+static pid_t main_pid = 0;
+
+static void
+sigdone(void)
+{
+ exit(CRM_EX_OK);
+}
+
+static void
+sigreap(void)
+{
+ pid_t pid = 0;
+ int status;
+
+ do {
+ /*
+ * Opinions seem to differ as to what to put here:
+ * -1, any child process
+ * 0, any child process whose process group ID is equal to that of the calling process
+ */
+ pid = waitpid(-1, &status, WNOHANG);
+ if (pid == main_pid) {
+ /* Exit when pacemaker-remote exits and use the same return code */
+ if (WIFEXITED(status)) {
+ exit(WEXITSTATUS(status));
+ }
+ exit(CRM_EX_ERROR);
+ }
+ } while (pid > 0);
+}
+
+static struct {
+ int sig;
+ void (*handler)(void);
+} sigmap[] = {
+ { SIGCHLD, sigreap },
+ { SIGINT, sigdone },
+};
+
+/*!
+ * \internal
+ * \brief Check a line of text for a valid environment variable name
+ *
+ * \param[in] line Text to check
+ * \param[out] first First character of valid name if found, NULL otherwise
+ * \param[out] last Last character of valid name if found, NULL otherwise
+ *
+ * \return TRUE if valid name found, FALSE otherwise
+ * \note It's reasonable to impose limitations on environment variable names
+ * beyond what C or setenv() does: We only allow names that contain only
+ * [a-zA-Z0-9_] characters and do not start with a digit.
+ */
+static bool
+find_env_var_name(char *line, char **first, char **last)
+{
+ // Skip leading whitespace
+ *first = line;
+ while (isspace(**first)) {
+ ++*first;
+ }
+
+ if (isalpha(**first) || (**first == '_')) { // Valid first character
+ *last = *first;
+ while (isalnum(*(*last + 1)) || (*(*last + 1) == '_')) {
+ ++*last;
+ }
+ return TRUE;
+ }
+
+ *first = *last = NULL;
+ return FALSE;
+}
+
+static void
+load_env_vars(const char *filename)
+{
+ /* We haven't forked or initialized logging yet, so don't leave any file
+ * descriptors open, and don't log -- silently ignore errors.
+ */
+ FILE *fp = fopen(filename, "r");
+
+ if (fp != NULL) {
+ char line[LINE_MAX] = { '\0', };
+
+ while (fgets(line, LINE_MAX, fp) != NULL) {
+ char *name = NULL;
+ char *end = NULL;
+ char *value = NULL;
+ char *quote = NULL;
+
+ // Look for valid name immediately followed by equals sign
+ if (find_env_var_name(line, &name, &end) && (*++end == '=')) {
+
+ // Null-terminate name, and advance beyond equals sign
+ *end++ = '\0';
+
+ // Check whether value is quoted
+ if ((*end == '\'') || (*end == '"')) {
+ quote = end++;
+ }
+ value = end;
+
+ if (quote) {
+ /* Value is remaining characters up to next non-backslashed
+ * matching quote character.
+ */
+ while (((*end != *quote) || (*(end - 1) == '\\'))
+ && (*end != '\0')) {
+ end++;
+ }
+ if (*end == *quote) {
+ // Null-terminate value, and advance beyond close quote
+ *end++ = '\0';
+ } else {
+ // Matching closing quote wasn't found
+ value = NULL;
+ }
+
+ } else {
+ /* Value is remaining characters up to next non-backslashed
+ * whitespace.
+ */
+ while ((!isspace(*end) || (*(end - 1) == '\\'))
+ && (*end != '\0')) {
+ ++end;
+ }
+
+ if (end == (line + LINE_MAX - 1)) {
+ // Line was too long
+ value = NULL;
+ }
+ // Do NOT null-terminate value (yet)
+ }
+
+ /* We have a valid name and value, and end is now the character
+ * after the closing quote or the first whitespace after the
+ * unquoted value. Make sure the rest of the line is just
+ * whitespace or a comment.
+ */
+ if (value) {
+ char *value_end = end;
+
+ while (isspace(*end) && (*end != '\n')) {
+ ++end;
+ }
+ if ((*end == '\n') || (*end == '#')) {
+ if (quote == NULL) {
+ // Now we can null-terminate an unquoted value
+ *value_end = '\0';
+ }
+
+ // Don't overwrite (bundle options take precedence)
+ setenv(name, value, 0);
+
+ } else {
+ value = NULL;
+ }
+ }
+ }
+
+ if ((value == NULL) && (strchr(line, '\n') == NULL)) {
+ // Eat remainder of line beyond LINE_MAX
+ if (fscanf(fp, "%*[^\n]\n") == EOF) {
+ value = NULL; // Don't care, make compiler happy
+ }
+ }
+ }
+ fclose(fp);
+ }
+}
+
+void
+remoted_spawn_pidone(int argc, char **argv, char **envp)
+{
+ sigset_t set;
+
+ /* This environment variable exists for two purposes:
+ * - For testing, setting it to "full" enables full PID 1 behavior even
+ * when PID is not 1
+ * - Setting to "vars" enables just the loading of environment variables
+ * from /etc/pacemaker/pcmk-init.env, which could be useful for testing or
+ * containers with a custom PID 1 script that launches pacemaker-remoted.
+ */
+ const char *pid1 = (getpid() == 1)? "full" : getenv("PCMK_remote_pid1");
+
+ if (pid1 == NULL) {
+ return;
+ }
+
+ /* When a container is launched, it may be given specific environment
+ * variables, which for Pacemaker bundles are given in the bundle
+ * configuration. However, that does not allow for host-specific values.
+ * To allow for that, look for a special file containing a shell-like syntax
+ * of name/value pairs, and export those into the environment.
+ */
+ load_env_vars("/etc/pacemaker/pcmk-init.env");
+
+ if (strcmp(pid1, "full")) {
+ return;
+ }
+
+ /* Containers can be expected to have /var/log, but they may not have
+ * /var/log/pacemaker, so use a different default if no value has been
+ * explicitly configured in the container's environment.
+ */
+ if (pcmk__env_option(PCMK__ENV_LOGFILE) == NULL) {
+ pcmk__set_env_option(PCMK__ENV_LOGFILE, "/var/log/pcmk-init.log");
+ }
+
+ sigfillset(&set);
+ sigprocmask(SIG_BLOCK, &set, 0);
+
+ main_pid = fork();
+ switch (main_pid) {
+ case 0:
+ sigprocmask(SIG_UNBLOCK, &set, NULL);
+ setsid();
+ setpgid(0, 0);
+
+ // Child remains as pacemaker-remoted
+ return;
+ case -1:
+ perror("fork");
+ }
+
+ /* Parent becomes the reaper of zombie processes */
+ /* Safe to initialize logging now if needed */
+
+# ifdef HAVE_PROGNAME
+ /* Differentiate ourselves in the 'ps' output */
+ {
+ char *p;
+ int i, maxlen;
+ char *LastArgv = NULL;
+ const char *name = "pcmk-init";
+
+ for (i = 0; i < argc; i++) {
+ if (!i || (LastArgv + 1 == argv[i]))
+ LastArgv = argv[i] + strlen(argv[i]);
+ }
+
+ for (i = 0; envp[i] != NULL; i++) {
+ if ((LastArgv + 1) == envp[i]) {
+ LastArgv = envp[i] + strlen(envp[i]);
+ }
+ }
+
+ maxlen = (LastArgv - argv[0]) - 2;
+
+ i = strlen(name);
+
+ /* We can overwrite individual argv[] arguments */
+ snprintf(argv[0], maxlen, "%s", name);
+
+ /* Now zero out everything else */
+ p = &argv[0][i];
+ while (p < LastArgv) {
+ *p++ = '\0';
+ }
+ argv[1] = NULL;
+ }
+# endif // HAVE_PROGNAME
+
+ while (1) {
+ int sig;
+ size_t i;
+
+ sigwait(&set, &sig);
+ for (i = 0; i < PCMK__NELEM(sigmap); i++) {
+ if (sigmap[i].sig == sig) {
+ sigmap[i].handler();
+ break;
+ }
+ }
+ }
+}
diff --git a/daemons/execd/remoted_proxy.c b/daemons/execd/remoted_proxy.c
new file mode 100644
index 0000000..62c8c3a
--- /dev/null
+++ b/daemons/execd/remoted_proxy.c
@@ -0,0 +1,470 @@
+/*
+ * Copyright 2012-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <glib.h>
+#include <unistd.h>
+
+#include "pacemaker-execd.h"
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/services.h>
+#include <crm/common/mainloop.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/cib/internal.h>
+#include <crm/fencing/internal.h>
+
+static qb_ipcs_service_t *cib_ro = NULL;
+static qb_ipcs_service_t *cib_rw = NULL;
+static qb_ipcs_service_t *cib_shm = NULL;
+
+static qb_ipcs_service_t *attrd_ipcs = NULL;
+static qb_ipcs_service_t *crmd_ipcs = NULL;
+static qb_ipcs_service_t *stonith_ipcs = NULL;
+static qb_ipcs_service_t *pacemakerd_ipcs = NULL;
+
+// An IPC provider is a cluster node controller connecting as a client
+static GList *ipc_providers = NULL;
+/* ipc clients == things like cibadmin, crm_resource, connecting locally */
+static GHashTable *ipc_clients = NULL;
+
+/*!
+ * \internal
+ * \brief Get an IPC proxy provider
+ *
+ * \return Pointer to a provider if one exists, NULL otherwise
+ *
+ * \note Grab the first provider, which is the most recent connection. That way,
+ * if we haven't yet timed out an old, failed connection, we don't try to
+ * use it.
+ */
+pcmk__client_t *
+ipc_proxy_get_provider(void)
+{
+ return ipc_providers? (pcmk__client_t *) (ipc_providers->data) : NULL;
+}
+
+/*!
+ * \internal
+ * \brief Accept a client connection on a proxy IPC server
+ *
+ * \param[in] c Client's IPC connection
+ * \param[in] uid Client's user ID
+ * \param[in] gid Client's group ID
+ * \param[in] ipc_channel Name of IPC server to proxy
+ *
+ * \return pcmk_ok on success, -errno on error
+ */
+static int32_t
+ipc_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid, const char *ipc_channel)
+{
+ pcmk__client_t *client;
+ pcmk__client_t *ipc_proxy = ipc_proxy_get_provider();
+ xmlNode *msg;
+
+ if (ipc_proxy == NULL) {
+ crm_warn("Cannot proxy IPC connection from uid %d gid %d to %s "
+ "because not connected to cluster", uid, gid, ipc_channel);
+ return -EREMOTEIO;
+ }
+
+ /* This new client is a local IPC client on a Pacemaker Remote controlled
+ * node, needing to access cluster node IPC services.
+ */
+ client = pcmk__new_client(c, uid, gid);
+ if (client == NULL) {
+ return -EREMOTEIO;
+ }
+
+ /* This ipc client is bound to a single ipc provider. If the
+ * provider goes away, this client is disconnected */
+ client->userdata = strdup(ipc_proxy->id);
+ client->name = crm_strdup_printf("proxy-%s-%d-%.8s", ipc_channel, client->pid, client->id);
+
+ /* Allow remote executor to distinguish between proxied local clients and
+ * actual executor API clients
+ */
+ pcmk__set_client_flags(client, pcmk__client_to_proxy);
+
+ g_hash_table_insert(ipc_clients, client->id, client);
+
+ msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
+ crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_NEW);
+ crm_xml_add(msg, F_LRMD_IPC_IPC_SERVER, ipc_channel);
+ crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id);
+ lrmd_server_send_notify(ipc_proxy, msg);
+ free_xml(msg);
+ crm_debug("Accepted IPC proxy connection (session ID %s) "
+ "from uid %d gid %d on channel %s",
+ client->id, uid, gid, ipc_channel);
+ return 0;
+}
+
+static int32_t
+crmd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
+{
+ return ipc_proxy_accept(c, uid, gid, CRM_SYSTEM_CRMD);
+}
+
+static int32_t
+attrd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
+{
+ return ipc_proxy_accept(c, uid, gid, T_ATTRD);
+}
+
+static int32_t
+stonith_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
+{
+ return ipc_proxy_accept(c, uid, gid, "stonith-ng");
+}
+
+static int32_t
+pacemakerd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
+{
+ return -EREMOTEIO;
+}
+
+static int32_t
+cib_proxy_accept_rw(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
+{
+ return ipc_proxy_accept(c, uid, gid, PCMK__SERVER_BASED_RW);
+}
+
+static int32_t
+cib_proxy_accept_ro(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
+{
+ return ipc_proxy_accept(c, uid, gid, PCMK__SERVER_BASED_RO);
+}
+
+void
+ipc_proxy_forward_client(pcmk__client_t *ipc_proxy, xmlNode *xml)
+{
+ const char *session = crm_element_value(xml, F_LRMD_IPC_SESSION);
+ const char *msg_type = crm_element_value(xml, F_LRMD_IPC_OP);
+ xmlNode *msg = get_message_xml(xml, F_LRMD_IPC_MSG);
+ pcmk__client_t *ipc_client;
+ int rc = pcmk_rc_ok;
+
+ /* If the IPC provider is acknowledging our shutdown request,
+ * defuse the short exit timer to give the cluster time to
+ * stop any resources we're running.
+ */
+ if (pcmk__str_eq(msg_type, LRMD_IPC_OP_SHUTDOWN_ACK, pcmk__str_casei)) {
+ handle_shutdown_ack();
+ return;
+ }
+
+ if (pcmk__str_eq(msg_type, LRMD_IPC_OP_SHUTDOWN_NACK, pcmk__str_casei)) {
+ handle_shutdown_nack();
+ return;
+ }
+
+ ipc_client = pcmk__find_client_by_id(session);
+ if (ipc_client == NULL) {
+ xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
+ crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_DESTROY);
+ crm_xml_add(msg, F_LRMD_IPC_SESSION, session);
+ lrmd_server_send_notify(ipc_proxy, msg);
+ free_xml(msg);
+ return;
+ }
+
+ /* This is an event or response from the ipc provider
+ * going to the local ipc client.
+ *
+ * Looking at the chain of events.
+ *
+ * -----remote node----------------|---- cluster node ------
+ * ipc_client <--1--> this code
+ * <--2--> pacemaker-controld:remote_proxy_cb/remote_proxy_relay_event()
+ * <--3--> ipc server
+ *
+ * This function is receiving a msg from connection 2
+ * and forwarding it to connection 1.
+ */
+
+ if (pcmk__str_eq(msg_type, LRMD_IPC_OP_EVENT, pcmk__str_casei)) {
+ crm_trace("Sending event to %s", ipc_client->id);
+ rc = pcmk__ipc_send_xml(ipc_client, 0, msg, crm_ipc_server_event);
+
+ } else if (pcmk__str_eq(msg_type, LRMD_IPC_OP_RESPONSE, pcmk__str_casei)) {
+ int msg_id = 0;
+
+ crm_element_value_int(xml, F_LRMD_IPC_MSG_ID, &msg_id);
+ crm_trace("Sending response to %d - %s", ipc_client->request_id, ipc_client->id);
+ rc = pcmk__ipc_send_xml(ipc_client, msg_id, msg, FALSE);
+
+ CRM_LOG_ASSERT(msg_id == ipc_client->request_id);
+ ipc_client->request_id = 0;
+
+ } else if (pcmk__str_eq(msg_type, LRMD_IPC_OP_DESTROY, pcmk__str_casei)) {
+ qb_ipcs_disconnect(ipc_client->ipcs);
+
+ } else {
+ crm_err("Unknown ipc proxy msg type %s" , msg_type);
+ }
+
+ if (rc != pcmk_rc_ok) {
+ crm_warn("Could not proxy IPC to client %s: %s " CRM_XS " rc=%d",
+ ipc_client->id, pcmk_rc_str(rc), rc);
+ }
+}
+
+static int32_t
+ipc_proxy_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
+{
+ uint32_t id = 0;
+ uint32_t flags = 0;
+ pcmk__client_t *client = pcmk__find_client(c);
+ pcmk__client_t *ipc_proxy = pcmk__find_client_by_id(client->userdata);
+ xmlNode *request = NULL;
+ xmlNode *msg = NULL;
+
+ if (!ipc_proxy) {
+ qb_ipcs_disconnect(client->ipcs);
+ return 0;
+ }
+
+ /* This is a request from the local ipc client going
+ * to the ipc provider.
+ *
+ * Looking at the chain of events.
+ *
+ * -----remote node----------------|---- cluster node ------
+ * ipc_client <--1--> this code
+ * <--2--> pacemaker-controld:remote_proxy_dispatch_internal()
+ * <--3--> ipc server
+ *
+ * This function is receiving a request from connection
+ * 1 and forwarding it to connection 2.
+ */
+ request = pcmk__client_data2xml(client, data, &id, &flags);
+
+ if (!request) {
+ return 0;
+ }
+
+ CRM_CHECK(client != NULL, crm_err("Invalid client");
+ free_xml(request); return FALSE);
+ CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client);
+ free_xml(request); return FALSE);
+
+ /* This ensures that synced request/responses happen over the event channel
+ * in the controller, allowing the controller to process the messages async.
+ */
+ pcmk__set_ipc_flags(flags, pcmk__client_name(client), crm_ipc_proxied);
+ client->request_id = id;
+
+ msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
+ crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_REQUEST);
+ crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id);
+ crm_xml_add(msg, F_LRMD_IPC_CLIENT, pcmk__client_name(client));
+ crm_xml_add(msg, F_LRMD_IPC_USER, client->user);
+ crm_xml_add_int(msg, F_LRMD_IPC_MSG_ID, id);
+ crm_xml_add_int(msg, F_LRMD_IPC_MSG_FLAGS, flags);
+ add_message_xml(msg, F_LRMD_IPC_MSG, request);
+ lrmd_server_send_notify(ipc_proxy, msg);
+ free_xml(request);
+ free_xml(msg);
+
+ return 0;
+}
+
+/*!
+ * \internal
+ * \brief Notify a proxy provider that we wish to shut down
+ *
+ * \param[in,out] ipc_proxy IPC client connection to proxy provider
+ *
+ * \return 0 on success, -1 on error
+ */
+int
+ipc_proxy_shutdown_req(pcmk__client_t *ipc_proxy)
+{
+ xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
+ int rc;
+
+ crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_SHUTDOWN_REQ);
+
+ /* We don't really have a session, but the controller needs this attribute
+ * to recognize this as proxy communication.
+ */
+ crm_xml_add(msg, F_LRMD_IPC_SESSION, "0");
+
+ rc = (lrmd_server_send_notify(ipc_proxy, msg) != pcmk_rc_ok)? -1 : 0;
+ free_xml(msg);
+ return rc;
+}
+
+static int32_t
+ipc_proxy_closed(qb_ipcs_connection_t * c)
+{
+ pcmk__client_t *client = pcmk__find_client(c);
+ pcmk__client_t *ipc_proxy;
+
+ if (client == NULL) {
+ return 0;
+ }
+
+ ipc_proxy = pcmk__find_client_by_id(client->userdata);
+
+ crm_trace("Connection %p", c);
+
+ if (ipc_proxy) {
+ xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
+ crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_DESTROY);
+ crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id);
+ lrmd_server_send_notify(ipc_proxy, msg);
+ free_xml(msg);
+ }
+
+ g_hash_table_remove(ipc_clients, client->id);
+
+ free(client->userdata);
+ client->userdata = NULL;
+ pcmk__free_client(client);
+ return 0;
+}
+
+static void
+ipc_proxy_destroy(qb_ipcs_connection_t * c)
+{
+ crm_trace("Connection %p", c);
+ ipc_proxy_closed(c);
+}
+
+static struct qb_ipcs_service_handlers crmd_proxy_callbacks = {
+ .connection_accept = crmd_proxy_accept,
+ .connection_created = NULL,
+ .msg_process = ipc_proxy_dispatch,
+ .connection_closed = ipc_proxy_closed,
+ .connection_destroyed = ipc_proxy_destroy
+};
+
+static struct qb_ipcs_service_handlers attrd_proxy_callbacks = {
+ .connection_accept = attrd_proxy_accept,
+ .connection_created = NULL,
+ .msg_process = ipc_proxy_dispatch,
+ .connection_closed = ipc_proxy_closed,
+ .connection_destroyed = ipc_proxy_destroy
+};
+
+static struct qb_ipcs_service_handlers stonith_proxy_callbacks = {
+ .connection_accept = stonith_proxy_accept,
+ .connection_created = NULL,
+ .msg_process = ipc_proxy_dispatch,
+ .connection_closed = ipc_proxy_closed,
+ .connection_destroyed = ipc_proxy_destroy
+};
+
+static struct qb_ipcs_service_handlers pacemakerd_proxy_callbacks = {
+ .connection_accept = pacemakerd_proxy_accept,
+ .connection_created = NULL,
+ .msg_process = NULL,
+ .connection_closed = NULL,
+ .connection_destroyed = NULL
+};
+
+static struct qb_ipcs_service_handlers cib_proxy_callbacks_ro = {
+ .connection_accept = cib_proxy_accept_ro,
+ .connection_created = NULL,
+ .msg_process = ipc_proxy_dispatch,
+ .connection_closed = ipc_proxy_closed,
+ .connection_destroyed = ipc_proxy_destroy
+};
+
+static struct qb_ipcs_service_handlers cib_proxy_callbacks_rw = {
+ .connection_accept = cib_proxy_accept_rw,
+ .connection_created = NULL,
+ .msg_process = ipc_proxy_dispatch,
+ .connection_closed = ipc_proxy_closed,
+ .connection_destroyed = ipc_proxy_destroy
+};
+
+void
+ipc_proxy_add_provider(pcmk__client_t *ipc_proxy)
+{
+ // Prepending ensures the most recent connection is always first
+ ipc_providers = g_list_prepend(ipc_providers, ipc_proxy);
+}
+
+void
+ipc_proxy_remove_provider(pcmk__client_t *ipc_proxy)
+{
+ GHashTableIter iter;
+ pcmk__client_t *ipc_client = NULL;
+ char *key = NULL;
+ GList *remove_these = NULL;
+ GList *gIter = NULL;
+
+ ipc_providers = g_list_remove(ipc_providers, ipc_proxy);
+
+ g_hash_table_iter_init(&iter, ipc_clients);
+ while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & ipc_client)) {
+ const char *proxy_id = ipc_client->userdata;
+ if (pcmk__str_eq(proxy_id, ipc_proxy->id, pcmk__str_casei)) {
+ crm_info("ipc proxy connection for client %s pid %d destroyed because cluster node disconnected.",
+ ipc_client->id, ipc_client->pid);
+ /* we can't remove during the iteration, so copy items
+ * to a list we can destroy later */
+ remove_these = g_list_append(remove_these, ipc_client);
+ }
+ }
+
+ for (gIter = remove_these; gIter != NULL; gIter = gIter->next) {
+ ipc_client = gIter->data;
+
+ // Disconnection callback will free the client here
+ qb_ipcs_disconnect(ipc_client->ipcs);
+ }
+
+ /* just frees the list, not the elements in the list */
+ g_list_free(remove_these);
+}
+
+void
+ipc_proxy_init(void)
+{
+ ipc_clients = pcmk__strkey_table(NULL, NULL);
+
+ pcmk__serve_based_ipc(&cib_ro, &cib_rw, &cib_shm, &cib_proxy_callbacks_ro,
+ &cib_proxy_callbacks_rw);
+ pcmk__serve_attrd_ipc(&attrd_ipcs, &attrd_proxy_callbacks);
+ pcmk__serve_fenced_ipc(&stonith_ipcs, &stonith_proxy_callbacks);
+ pcmk__serve_pacemakerd_ipc(&pacemakerd_ipcs, &pacemakerd_proxy_callbacks);
+ crmd_ipcs = pcmk__serve_controld_ipc(&crmd_proxy_callbacks);
+ if (crmd_ipcs == NULL) {
+ crm_err("Failed to create controller: exiting and inhibiting respawn");
+ crm_warn("Verify pacemaker and pacemaker_remote are not both enabled");
+ crm_exit(CRM_EX_FATAL);
+ }
+}
+
+void
+ipc_proxy_cleanup(void)
+{
+ if (ipc_providers) {
+ g_list_free(ipc_providers);
+ ipc_providers = NULL;
+ }
+ if (ipc_clients) {
+ g_hash_table_destroy(ipc_clients);
+ ipc_clients = NULL;
+ }
+ pcmk__stop_based_ipc(cib_ro, cib_rw, cib_shm);
+ qb_ipcs_destroy(attrd_ipcs);
+ qb_ipcs_destroy(stonith_ipcs);
+ qb_ipcs_destroy(pacemakerd_ipcs);
+ qb_ipcs_destroy(crmd_ipcs);
+ cib_ro = NULL;
+ cib_rw = NULL;
+ cib_shm = NULL;
+}
diff --git a/daemons/execd/remoted_tls.c b/daemons/execd/remoted_tls.c
new file mode 100644
index 0000000..c65e3f3
--- /dev/null
+++ b/daemons/execd/remoted_tls.c
@@ -0,0 +1,428 @@
+/*
+ * Copyright 2012-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <glib.h>
+#include <unistd.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/mainloop.h>
+#include <crm/common/remote_internal.h>
+#include <crm/lrmd_internal.h>
+
+#include <netdb.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <arpa/inet.h>
+
+#include "pacemaker-execd.h"
+
+#ifdef HAVE_GNUTLS_GNUTLS_H
+
+# include <gnutls/gnutls.h>
+
+# define LRMD_REMOTE_AUTH_TIMEOUT 10000
+gnutls_psk_server_credentials_t psk_cred_s;
+gnutls_dh_params_t dh_params;
+static int ssock = -1;
+extern int lrmd_call_id;
+
+static void
+debug_log(int level, const char *str)
+{
+ fputs(str, stderr);
+}
+
+/*!
+ * \internal
+ * \brief Read (more) TLS handshake data from client
+ *
+ * \param[in,out] client IPC client doing handshake
+ *
+ * \return 0 on success or more data needed, -1 on error
+ */
+static int
+remoted__read_handshake_data(pcmk__client_t *client)
+{
+ int rc = pcmk__read_handshake_data(client);
+
+ if (rc == EAGAIN) {
+ /* No more data is available at the moment. Just return for now;
+ * we'll get invoked again once the client sends more.
+ */
+ return 0;
+ } else if (rc != pcmk_rc_ok) {
+ return -1;
+ }
+
+ if (client->remote->auth_timeout) {
+ g_source_remove(client->remote->auth_timeout);
+ }
+ client->remote->auth_timeout = 0;
+
+ pcmk__set_client_flags(client, pcmk__client_tls_handshake_complete);
+ crm_notice("Remote client connection accepted");
+
+ /* Only a client with access to the TLS key can connect, so we can treat
+ * it as privileged.
+ */
+ pcmk__set_client_flags(client, pcmk__client_privileged);
+
+ // Alert other clients of the new connection
+ notify_of_new_client(client);
+ return 0;
+}
+
+static int
+lrmd_remote_client_msg(gpointer data)
+{
+ int id = 0;
+ int rc;
+ xmlNode *request = NULL;
+ pcmk__client_t *client = data;
+
+ if (!pcmk_is_set(client->flags,
+ pcmk__client_tls_handshake_complete)) {
+ return remoted__read_handshake_data(client);
+ }
+
+ switch (pcmk__remote_ready(client->remote, 0)) {
+ case pcmk_rc_ok:
+ break;
+ case ETIME: // No message available to read
+ return 0;
+ default: // Error
+ crm_info("Remote client disconnected while polling it");
+ return -1;
+ }
+
+ rc = pcmk__read_remote_message(client->remote, -1);
+
+ request = pcmk__remote_message_xml(client->remote);
+ while (request) {
+ crm_element_value_int(request, F_LRMD_REMOTE_MSG_ID, &id);
+ crm_trace("Processing remote client request %d", id);
+ if (!client->name) {
+ const char *value = crm_element_value(request, F_LRMD_CLIENTNAME);
+
+ if (value) {
+ client->name = strdup(value);
+ }
+ }
+
+ lrmd_call_id++;
+ if (lrmd_call_id < 1) {
+ lrmd_call_id = 1;
+ }
+
+ crm_xml_add(request, F_LRMD_CLIENTID, client->id);
+ crm_xml_add(request, F_LRMD_CLIENTNAME, client->name);
+ crm_xml_add_int(request, F_LRMD_CALLID, lrmd_call_id);
+
+ process_lrmd_message(client, id, request);
+ free_xml(request);
+
+ /* process all the messages in the current buffer */
+ request = pcmk__remote_message_xml(client->remote);
+ }
+
+ if (rc == ENOTCONN) {
+ crm_info("Remote client disconnected while reading from it");
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+lrmd_remote_client_destroy(gpointer user_data)
+{
+ pcmk__client_t *client = user_data;
+
+ if (client == NULL) {
+ return;
+ }
+
+ crm_notice("Cleaning up after remote client %s disconnected",
+ pcmk__client_name(client));
+
+ ipc_proxy_remove_provider(client);
+
+ /* if this is the last remote connection, stop recurring
+ * operations */
+ if (pcmk__ipc_client_count() == 1) {
+ client_disconnect_cleanup(NULL);
+ }
+
+ if (client->remote->tls_session) {
+ void *sock_ptr;
+ int csock;
+
+ sock_ptr = gnutls_transport_get_ptr(*client->remote->tls_session);
+ csock = GPOINTER_TO_INT(sock_ptr);
+
+ gnutls_bye(*client->remote->tls_session, GNUTLS_SHUT_RDWR);
+ gnutls_deinit(*client->remote->tls_session);
+ gnutls_free(client->remote->tls_session);
+ close(csock);
+ }
+
+ lrmd_client_destroy(client);
+ return;
+}
+
+static gboolean
+lrmd_auth_timeout_cb(gpointer data)
+{
+ pcmk__client_t *client = data;
+
+ client->remote->auth_timeout = 0;
+
+ if (pcmk_is_set(client->flags,
+ pcmk__client_tls_handshake_complete)) {
+ return FALSE;
+ }
+
+ mainloop_del_fd(client->remote->source);
+ client->remote->source = NULL;
+ crm_err("Remote client authentication timed out");
+
+ return FALSE;
+}
+
+// Dispatch callback for remote server socket
+static int
+lrmd_remote_listen(gpointer data)
+{
+ int csock = -1;
+ gnutls_session_t *session = NULL;
+ pcmk__client_t *new_client = NULL;
+
+ // For client socket
+ static struct mainloop_fd_callbacks lrmd_remote_fd_cb = {
+ .dispatch = lrmd_remote_client_msg,
+ .destroy = lrmd_remote_client_destroy,
+ };
+
+ CRM_CHECK(ssock >= 0, return TRUE);
+
+ if (pcmk__accept_remote_connection(ssock, &csock) != pcmk_rc_ok) {
+ return TRUE;
+ }
+
+ session = pcmk__new_tls_session(csock, GNUTLS_SERVER, GNUTLS_CRD_PSK,
+ psk_cred_s);
+ if (session == NULL) {
+ close(csock);
+ return TRUE;
+ }
+
+ new_client = pcmk__new_unauth_client(NULL);
+ new_client->remote = calloc(1, sizeof(pcmk__remote_t));
+ pcmk__set_client_flags(new_client, pcmk__client_tls);
+ new_client->remote->tls_session = session;
+
+ // Require the client to authenticate within this time
+ new_client->remote->auth_timeout = g_timeout_add(LRMD_REMOTE_AUTH_TIMEOUT,
+ lrmd_auth_timeout_cb,
+ new_client);
+ crm_info("Remote client pending authentication "
+ CRM_XS " %p id: %s", new_client, new_client->id);
+
+ new_client->remote->source =
+ mainloop_add_fd("pacemaker-remote-client", G_PRIORITY_DEFAULT, csock,
+ new_client, &lrmd_remote_fd_cb);
+ return TRUE;
+}
+
+static void
+tls_server_dropped(gpointer user_data)
+{
+ crm_notice("TLS server session ended");
+ return;
+}
+
+// \return 0 on success, -1 on error (gnutls_psk_server_credentials_function)
+static int
+lrmd_tls_server_key_cb(gnutls_session_t session, const char *username, gnutls_datum_t * key)
+{
+ return (lrmd__init_remote_key(key) == pcmk_rc_ok)? 0 : -1;
+}
+
+static int
+bind_and_listen(struct addrinfo *addr)
+{
+ int optval;
+ int fd;
+ int rc;
+ char buffer[INET6_ADDRSTRLEN] = { 0, };
+
+ pcmk__sockaddr2str(addr->ai_addr, buffer);
+ crm_trace("Attempting to bind to address %s", buffer);
+
+ fd = socket(addr->ai_family, addr->ai_socktype, addr->ai_protocol);
+ if (fd < 0) {
+ crm_perror(LOG_ERR, "Listener socket creation failed");
+ return -1;
+ }
+
+ /* reuse address */
+ optval = 1;
+ rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
+ if (rc < 0) {
+ crm_perror(LOG_ERR, "Local address reuse not allowed on %s", buffer);
+ close(fd);
+ return -1;
+ }
+
+ if (addr->ai_family == AF_INET6) {
+ optval = 0;
+ rc = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &optval, sizeof(optval));
+ if (rc < 0) {
+ crm_perror(LOG_INFO, "Couldn't disable IPV6-only on %s", buffer);
+ close(fd);
+ return -1;
+ }
+ }
+
+ if (bind(fd, addr->ai_addr, addr->ai_addrlen) != 0) {
+ crm_perror(LOG_ERR, "Cannot bind to %s", buffer);
+ close(fd);
+ return -1;
+ }
+
+ if (listen(fd, 10) == -1) {
+ crm_perror(LOG_ERR, "Cannot listen on %s", buffer);
+ close(fd);
+ return -1;
+ }
+ return fd;
+}
+
+static int
+get_address_info(const char *bind_name, int port, struct addrinfo **res)
+{
+ int rc;
+ char port_str[6]; // at most "65535"
+ struct addrinfo hints;
+
+ memset(&hints, 0, sizeof(struct addrinfo));
+ hints.ai_flags = AI_PASSIVE;
+ hints.ai_family = AF_UNSPEC; // IPv6 or IPv4
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_protocol = IPPROTO_TCP;
+
+ snprintf(port_str, sizeof(port_str), "%d", port);
+ rc = getaddrinfo(bind_name, port_str, &hints, res);
+ if (rc) {
+ crm_err("Unable to get IP address(es) for %s: %s",
+ (bind_name? bind_name : "local node"), gai_strerror(rc));
+ return -EADDRNOTAVAIL;
+ }
+ return pcmk_ok;
+}
+
+int
+lrmd_init_remote_tls_server(void)
+{
+ int filter;
+ int port = crm_default_remote_port();
+ struct addrinfo *res = NULL, *iter;
+ gnutls_datum_t psk_key = { NULL, 0 };
+ const char *bind_name = getenv("PCMK_remote_address");
+
+ static struct mainloop_fd_callbacks remote_listen_fd_callbacks = {
+ .dispatch = lrmd_remote_listen,
+ .destroy = tls_server_dropped,
+ };
+
+ CRM_CHECK(ssock == -1, return ssock);
+
+ crm_debug("Starting TLS listener on %s port %d",
+ (bind_name? bind_name : "all addresses on"), port);
+ crm_gnutls_global_init();
+ gnutls_global_set_log_function(debug_log);
+
+ if (pcmk__init_tls_dh(&dh_params) != pcmk_rc_ok) {
+ return -1;
+ }
+ gnutls_psk_allocate_server_credentials(&psk_cred_s);
+ gnutls_psk_set_server_credentials_function(psk_cred_s, lrmd_tls_server_key_cb);
+ gnutls_psk_set_server_dh_params(psk_cred_s, dh_params);
+
+ /* The key callback won't get called until the first client connection
+ * attempt. Do it once here, so we can warn the user at start-up if we can't
+ * read the key. We don't error out, though, because it's fine if the key is
+ * going to be added later.
+ */
+ if (lrmd__init_remote_key(&psk_key) != pcmk_rc_ok) {
+ crm_warn("A cluster connection will not be possible until the key is available");
+ }
+ gnutls_free(psk_key.data);
+
+ if (get_address_info(bind_name, port, &res) != pcmk_ok) {
+ return -1;
+ }
+
+ /* Currently we listen on only one address from the resulting list (the
+ * first IPv6 address we can bind to if possible, otherwise the first IPv4
+ * address we can bind to). When bind_name is NULL, this should be the
+ * respective wildcard address.
+ *
+ * @TODO If there is demand for specifying more than one address, allow
+ * bind_name to be a space-separated list, call getaddrinfo() for each,
+ * and create a socket for each result (set IPV6_V6ONLY on IPv6 sockets
+ * since IPv4 listeners will have their own sockets).
+ */
+ iter = res;
+ filter = AF_INET6;
+ while (iter) {
+ if (iter->ai_family == filter) {
+ ssock = bind_and_listen(iter);
+ }
+ if (ssock != -1) {
+ break;
+ }
+
+ iter = iter->ai_next;
+ if (iter == NULL && filter == AF_INET6) {
+ iter = res;
+ filter = AF_INET;
+ }
+ }
+
+ if (ssock >= 0) {
+ mainloop_add_fd("pacemaker-remote-server", G_PRIORITY_DEFAULT, ssock,
+ NULL, &remote_listen_fd_callbacks);
+ crm_debug("Started TLS listener on %s port %d",
+ (bind_name? bind_name : "all addresses on"), port);
+ }
+ freeaddrinfo(res);
+ return ssock;
+}
+
+void
+execd_stop_tls_server(void)
+{
+ if (psk_cred_s) {
+ gnutls_psk_free_server_credentials(psk_cred_s);
+ psk_cred_s = 0;
+ }
+
+ if (ssock >= 0) {
+ close(ssock);
+ ssock = -1;
+ }
+}
+#endif
diff --git a/daemons/fenced/Makefile.am b/daemons/fenced/Makefile.am
new file mode 100644
index 0000000..2ca0088
--- /dev/null
+++ b/daemons/fenced/Makefile.am
@@ -0,0 +1,52 @@
+#
+# Original Author: Sun Jiang Dong <sunjd@cn.ibm.com>
+# Copyright 2004 International Business Machines
+#
+# with later changes copyright 2004-2023 the Pacemaker project contributors.
+# The version control history for this file may have further details.
+#
+# This source code is licensed under the GNU General Public License version 2
+# or later (GPLv2+) WITHOUT ANY WARRANTY.
+#
+
+include $(top_srcdir)/mk/common.mk
+include $(top_srcdir)/mk/man.mk
+
+halibdir = $(CRM_DAEMON_DIR)
+
+halib_PROGRAMS = pacemaker-fenced cts-fence-helper
+
+noinst_HEADERS = pacemaker-fenced.h
+
+if BUILD_XML_HELP
+man7_MANS = pacemaker-fenced.7
+endif
+
+cts_fence_helper_SOURCES = cts-fence-helper.c
+cts_fence_helper_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \
+ $(top_builddir)/lib/fencing/libstonithd.la
+
+pacemaker_fenced_YFLAGS = -d
+pacemaker_fenced_CFLAGS = $(CFLAGS_HARDENED_EXE)
+pacemaker_fenced_LDFLAGS = $(LDFLAGS_HARDENED_EXE)
+pacemaker_fenced_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \
+ $(top_builddir)/lib/cib/libcib.la \
+ $(top_builddir)/lib/cluster/libcrmcluster.la \
+ $(top_builddir)/lib/fencing/libstonithd.la \
+ $(top_builddir)/lib/pengine/libpe_status.la \
+ $(top_builddir)/lib/pacemaker/libpacemaker.la \
+ $(CLUSTERLIBS)
+pacemaker_fenced_SOURCES = pacemaker-fenced.c \
+ fenced_commands.c \
+ fenced_remote.c \
+ fenced_history.c
+
+CLEANFILES = $(man7_MANS) $(man8_MANS)
+
+if BUILD_LEGACY_LINKS
+install-exec-hook:
+ cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f stonithd && $(LN_S) pacemaker-fenced stonithd
+
+uninstall-hook:
+ cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f stonithd
+endif
diff --git a/daemons/fenced/cts-fence-helper.c b/daemons/fenced/cts-fence-helper.c
new file mode 100644
index 0000000..e18a1f4
--- /dev/null
+++ b/daemons/fenced/cts-fence-helper.c
@@ -0,0 +1,681 @@
+/*
+ * Copyright 2009-2023 the Pacemaker project contributors
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <stdio.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/utsname.h>
+
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/ipc.h>
+#include <crm/cluster/internal.h>
+
+#include <crm/stonith-ng.h>
+#include <crm/fencing/internal.h>
+#include <crm/common/agents.h>
+#include <crm/common/cmdline_internal.h>
+#include <crm/common/xml.h>
+
+#include <crm/common/mainloop.h>
+
+#define SUMMARY "cts-fence-helper - inject commands into the Pacemaker fencer and watch for events"
+
+static GMainLoop *mainloop = NULL;
+static crm_trigger_t *trig = NULL;
+static int mainloop_iter = 0;
+static pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+
+typedef void (*mainloop_test_iteration_cb) (int check_event);
+
+#define MAINLOOP_DEFAULT_TIMEOUT 2
+
+enum test_modes {
+ test_standard = 0, // test using a specific developer environment
+ test_passive, // watch notifications only
+ test_api_sanity, // sanity-test stonith client API using fence_dummy
+ test_api_mainloop, // sanity-test mainloop code with async responses
+};
+
+struct {
+ enum test_modes mode;
+} options = {
+ .mode = test_standard
+};
+
+static gboolean
+mode_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
+ if (pcmk__str_any_of(option_name, "--mainloop_api_test", "-m", NULL)) {
+ options.mode = test_api_mainloop;
+ } else if (pcmk__str_any_of(option_name, "--api_test", "-t", NULL)) {
+ options.mode = test_api_sanity;
+ } else if (pcmk__str_any_of(option_name, "--passive", "-p", NULL)) {
+ options.mode = test_passive;
+ }
+
+ return TRUE;
+}
+
+static GOptionEntry entries[] = {
+ { "mainloop_api_test", 'm', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, mode_cb,
+ NULL, NULL,
+ },
+
+ { "api_test", 't', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, mode_cb,
+ NULL, NULL,
+ },
+
+ { "passive", 'p', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, mode_cb,
+ NULL, NULL,
+ },
+
+ { NULL }
+};
+
+static stonith_t *st = NULL;
+static struct pollfd pollfd;
+static const int st_opts = st_opt_sync_call;
+static int expected_notifications = 0;
+static int verbose = 0;
+
+static void
+mainloop_test_done(const char *origin, bool pass)
+{
+ if (pass) {
+ crm_info("SUCCESS - %s", origin);
+ mainloop_iter++;
+ mainloop_set_trigger(trig);
+ result.execution_status = PCMK_EXEC_DONE;
+ result.exit_status = CRM_EX_OK;
+ } else {
+ crm_err("FAILURE - %s (%d: %s)", origin, result.exit_status,
+ pcmk_exec_status_str(result.execution_status));
+ crm_exit(CRM_EX_ERROR);
+ }
+}
+
+
+static void
+dispatch_helper(int timeout)
+{
+ int rc;
+
+ crm_debug("Looking for notification");
+ pollfd.events = POLLIN;
+ while (true) {
+ rc = poll(&pollfd, 1, timeout); /* wait 10 minutes, -1 forever */
+ if (rc > 0) {
+ if (!stonith_dispatch(st)) {
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+}
+
+static void
+st_callback(stonith_t * st, stonith_event_t * e)
+{
+ char *desc = NULL;
+
+ if (st->state == stonith_disconnected) {
+ crm_exit(CRM_EX_DISCONNECT);
+ }
+
+ desc = stonith__event_description(e);
+ crm_notice("%s", desc);
+ free(desc);
+
+ if (expected_notifications) {
+ expected_notifications--;
+ }
+}
+
+static void
+st_global_callback(stonith_t * stonith, stonith_callback_data_t * data)
+{
+ crm_notice("Call %d exited %d: %s (%s)",
+ data->call_id, stonith__exit_status(data),
+ stonith__execution_status(data),
+ pcmk__s(stonith__exit_reason(data), "unspecified reason"));
+}
+
+static void
+passive_test(void)
+{
+ int rc = 0;
+
+ rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
+ if (rc != pcmk_ok) {
+ stonith_api_delete(st);
+ crm_exit(CRM_EX_DISCONNECT);
+ }
+ st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, st_callback);
+ st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, st_callback);
+ st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback);
+ st->cmds->register_notification(st, STONITH_OP_DEVICE_DEL, st_callback);
+ st->cmds->register_callback(st, 0, 120, st_opt_timeout_updates, NULL, "st_global_callback",
+ st_global_callback);
+
+ dispatch_helper(600 * 1000);
+}
+
+#define single_test(cmd, str, num_notifications, expected_rc) \
+{ \
+ int rc = 0; \
+ rc = cmd; \
+ expected_notifications = 0; \
+ if (num_notifications) { \
+ expected_notifications = num_notifications; \
+ dispatch_helper(500); \
+ } \
+ if (rc != expected_rc) { \
+ crm_err("FAILURE - expected rc %d != %d(%s) for cmd - %s", expected_rc, rc, pcmk_strerror(rc), str); \
+ crm_exit(CRM_EX_ERROR); \
+ } else if (expected_notifications) { \
+ crm_err("FAILURE - expected %d notifications, got only %d for cmd - %s", \
+ num_notifications, num_notifications - expected_notifications, str); \
+ crm_exit(CRM_EX_ERROR); \
+ } else { \
+ if (verbose) { \
+ crm_info("SUCCESS - %s: %d", str, rc); \
+ } else { \
+ crm_debug("SUCCESS - %s: %d", str, rc); \
+ } \
+ } \
+}\
+
+static void
+run_fence_failure_test(void)
+{
+ stonith_key_value_t *params = NULL;
+
+ params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
+ "false_1_node1=1,2 false_1_node2=3,4");
+ params = stonith_key_value_add(params, "mode", "fail");
+
+ single_test(st->
+ cmds->register_device(st, st_opts, "test-id1", "stonith-ng", "fence_dummy", params),
+ "Register device1 for failure test", 1, 0);
+
+ single_test(st->cmds->fence(st, st_opts, "false_1_node2", "off", 3, 0),
+ "Fence failure results off", 1, -ENODATA);
+
+ single_test(st->cmds->fence(st, st_opts, "false_1_node2", "reboot", 3, 0),
+ "Fence failure results reboot", 1, -ENODATA);
+
+ single_test(st->cmds->remove_device(st, st_opts, "test-id1"),
+ "Remove device1 for failure test", 1, 0);
+
+ stonith_key_value_freeall(params, 1, 1);
+}
+
+static void
+run_fence_failure_rollover_test(void)
+{
+ stonith_key_value_t *params = NULL;
+
+ params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
+ "false_1_node1=1,2 false_1_node2=3,4");
+ params = stonith_key_value_add(params, "mode", "fail");
+
+ single_test(st->
+ cmds->register_device(st, st_opts, "test-id1", "stonith-ng", "fence_dummy", params),
+ "Register device1 for rollover test", 1, 0);
+ stonith_key_value_freeall(params, 1, 1);
+ params = NULL;
+ params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
+ "false_1_node1=1,2 false_1_node2=3,4");
+ params = stonith_key_value_add(params, "mode", "pass");
+
+ single_test(st->
+ cmds->register_device(st, st_opts, "test-id2", "stonith-ng", "fence_dummy", params),
+ "Register device2 for rollover test", 1, 0);
+
+ single_test(st->cmds->fence(st, st_opts, "false_1_node2", "off", 3, 0),
+ "Fence rollover results off", 1, 0);
+
+ /* Expect -ENODEV because fence_dummy requires 'on' to be executed on target */
+ single_test(st->cmds->fence(st, st_opts, "false_1_node2", "on", 3, 0),
+ "Fence rollover results on", 1, -ENODEV);
+
+ single_test(st->cmds->remove_device(st, st_opts, "test-id1"),
+ "Remove device1 for rollover tests", 1, 0);
+
+ single_test(st->cmds->remove_device(st, st_opts, "test-id2"),
+ "Remove device2 for rollover tests", 1, 0);
+
+ stonith_key_value_freeall(params, 1, 1);
+}
+
+static void
+run_standard_test(void)
+{
+ stonith_key_value_t *params = NULL;
+
+ params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
+ "false_1_node1=1,2 false_1_node2=3,4");
+ params = stonith_key_value_add(params, "mode", "pass");
+ params = stonith_key_value_add(params, "mock_dynamic_hosts", "false_1_node1 false_1_node2");
+
+ single_test(st->
+ cmds->register_device(st, st_opts, "test-id", "stonith-ng", "fence_dummy", params),
+ "Register", 1, 0);
+ stonith_key_value_freeall(params, 1, 1);
+ params = NULL;
+
+ single_test(st->cmds->list(st, st_opts, "test-id", NULL, 1), "list", 1, 0);
+
+ single_test(st->cmds->monitor(st, st_opts, "test-id", 1), "Monitor", 1, 0);
+
+ single_test(st->cmds->status(st, st_opts, "test-id", "false_1_node2", 1),
+ "Status false_1_node2", 1, 0);
+
+ single_test(st->cmds->status(st, st_opts, "test-id", "false_1_node1", 1),
+ "Status false_1_node1", 1, 0);
+
+ single_test(st->cmds->fence(st, st_opts, "unknown-host", "off", 1, 0),
+ "Fence unknown-host (expected failure)", 0, -ENODEV);
+
+ single_test(st->cmds->fence(st, st_opts, "false_1_node1", "off", 1, 0),
+ "Fence false_1_node1", 1, 0);
+
+ /* Expect -ENODEV because fence_dummy requires 'on' to be executed on target */
+ single_test(st->cmds->fence(st, st_opts, "false_1_node1", "on", 1, 0),
+ "Unfence false_1_node1", 1, -ENODEV);
+
+ /* Confirm that an invalid level index is rejected */
+ single_test(st->cmds->register_level(st, st_opts, "node1", 999, params),
+ "Attempt to register an invalid level index", 0, -EINVAL);
+
+ single_test(st->cmds->remove_device(st, st_opts, "test-id"), "Remove test-id", 1, 0);
+
+ stonith_key_value_freeall(params, 1, 1);
+}
+
+static void
+sanity_tests(void)
+{
+ int rc = 0;
+
+ rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
+ if (rc != pcmk_ok) {
+ stonith_api_delete(st);
+ crm_exit(CRM_EX_DISCONNECT);
+ }
+ st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, st_callback);
+ st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, st_callback);
+ st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback);
+ st->cmds->register_notification(st, STONITH_OP_DEVICE_DEL, st_callback);
+ st->cmds->register_callback(st, 0, 120, st_opt_timeout_updates, NULL, "st_global_callback",
+ st_global_callback);
+
+ crm_info("Starting API Sanity Tests");
+ run_standard_test();
+ run_fence_failure_test();
+ run_fence_failure_rollover_test();
+ crm_info("Sanity Tests Passed");
+}
+
+static void
+standard_dev_test(void)
+{
+ int rc = 0;
+ char *tmp = NULL;
+ stonith_key_value_t *params = NULL;
+
+ rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
+ if (rc != pcmk_ok) {
+ stonith_api_delete(st);
+ crm_exit(CRM_EX_DISCONNECT);
+ }
+
+ params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
+ "some-host=pcmk-7 true_1_node1=3,4");
+
+ rc = st->cmds->register_device(st, st_opts, "test-id", "stonith-ng", "fence_xvm", params);
+ crm_debug("Register: %d", rc);
+
+ rc = st->cmds->list(st, st_opts, "test-id", &tmp, 10);
+ crm_debug("List: %d output: %s", rc, tmp ? tmp : "<none>");
+
+ rc = st->cmds->monitor(st, st_opts, "test-id", 10);
+ crm_debug("Monitor: %d", rc);
+
+ rc = st->cmds->status(st, st_opts, "test-id", "false_1_node2", 10);
+ crm_debug("Status false_1_node2: %d", rc);
+
+ rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10);
+ crm_debug("Status false_1_node1: %d", rc);
+
+ rc = st->cmds->fence(st, st_opts, "unknown-host", "off", 60, 0);
+ crm_debug("Fence unknown-host: %d", rc);
+
+ rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10);
+ crm_debug("Status false_1_node1: %d", rc);
+
+ rc = st->cmds->fence(st, st_opts, "false_1_node1", "off", 60, 0);
+ crm_debug("Fence false_1_node1: %d", rc);
+
+ rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10);
+ crm_debug("Status false_1_node1: %d", rc);
+
+ rc = st->cmds->fence(st, st_opts, "false_1_node1", "on", 10, 0);
+ crm_debug("Unfence false_1_node1: %d", rc);
+
+ rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10);
+ crm_debug("Status false_1_node1: %d", rc);
+
+ rc = st->cmds->fence(st, st_opts, "some-host", "off", 10, 0);
+ crm_debug("Fence alias: %d", rc);
+
+ rc = st->cmds->status(st, st_opts, "test-id", "some-host", 10);
+ crm_debug("Status alias: %d", rc);
+
+ rc = st->cmds->fence(st, st_opts, "false_1_node1", "on", 10, 0);
+ crm_debug("Unfence false_1_node1: %d", rc);
+
+ rc = st->cmds->remove_device(st, st_opts, "test-id");
+ crm_debug("Remove test-id: %d", rc);
+
+ stonith_key_value_freeall(params, 1, 1);
+}
+
+static void
+ iterate_mainloop_tests(gboolean event_ready);
+
+static void
+mainloop_callback(stonith_t * stonith, stonith_callback_data_t * data)
+{
+ pcmk__set_result(&result, stonith__exit_status(data),
+ stonith__execution_status(data),
+ stonith__exit_reason(data));
+ iterate_mainloop_tests(TRUE);
+}
+
+static int
+register_callback_helper(int callid)
+{
+ return st->cmds->register_callback(st,
+ callid,
+ MAINLOOP_DEFAULT_TIMEOUT,
+ st_opt_timeout_updates, NULL, "callback", mainloop_callback);
+}
+
+static void
+test_async_fence_pass(int check_event)
+{
+ int rc = 0;
+
+ if (check_event) {
+ mainloop_test_done(__func__, (result.exit_status == CRM_EX_OK));
+ return;
+ }
+
+ rc = st->cmds->fence(st, 0, "true_1_node1", "off", MAINLOOP_DEFAULT_TIMEOUT, 0);
+ if (rc < 0) {
+ crm_err("fence failed with rc %d", rc);
+ mainloop_test_done(__func__, false);
+ }
+ register_callback_helper(rc);
+ /* wait for event */
+}
+
+#define CUSTOM_TIMEOUT_ADDITION 10
+static void
+test_async_fence_custom_timeout(int check_event)
+{
+ int rc = 0;
+ static time_t begin = 0;
+
+ if (check_event) {
+ uint32_t diff = (time(NULL) - begin);
+
+ if (result.execution_status != PCMK_EXEC_TIMEOUT) {
+ mainloop_test_done(__func__, false);
+ } else if (diff < CUSTOM_TIMEOUT_ADDITION + MAINLOOP_DEFAULT_TIMEOUT) {
+ crm_err
+ ("Custom timeout test failed, callback expiration should be updated to %d, actual timeout was %d",
+ CUSTOM_TIMEOUT_ADDITION + MAINLOOP_DEFAULT_TIMEOUT, diff);
+ mainloop_test_done(__func__, false);
+ } else {
+ mainloop_test_done(__func__, true);
+ }
+ return;
+ }
+ begin = time(NULL);
+
+ rc = st->cmds->fence(st, 0, "custom_timeout_node1", "off", MAINLOOP_DEFAULT_TIMEOUT, 0);
+ if (rc < 0) {
+ crm_err("fence failed with rc %d", rc);
+ mainloop_test_done(__func__, false);
+ }
+ register_callback_helper(rc);
+ /* wait for event */
+}
+
+static void
+test_async_fence_timeout(int check_event)
+{
+ int rc = 0;
+
+ if (check_event) {
+ mainloop_test_done(__func__,
+ (result.execution_status == PCMK_EXEC_NO_FENCE_DEVICE));
+ return;
+ }
+
+ rc = st->cmds->fence(st, 0, "false_1_node2", "off", MAINLOOP_DEFAULT_TIMEOUT, 0);
+ if (rc < 0) {
+ crm_err("fence failed with rc %d", rc);
+ mainloop_test_done(__func__, false);
+ }
+ register_callback_helper(rc);
+ /* wait for event */
+}
+
+static void
+test_async_monitor(int check_event)
+{
+ int rc = 0;
+
+ if (check_event) {
+ mainloop_test_done(__func__, (result.exit_status == CRM_EX_OK));
+ return;
+ }
+
+ rc = st->cmds->monitor(st, 0, "false_1", MAINLOOP_DEFAULT_TIMEOUT);
+ if (rc < 0) {
+ crm_err("monitor failed with rc %d", rc);
+ mainloop_test_done(__func__, false);
+ }
+
+ register_callback_helper(rc);
+ /* wait for event */
+}
+
+static void
+test_register_async_devices(int check_event)
+{
+ char buf[16] = { 0, };
+ stonith_key_value_t *params = NULL;
+
+ params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
+ "false_1_node1=1,2");
+ params = stonith_key_value_add(params, "mode", "fail");
+ st->cmds->register_device(st, st_opts, "false_1", "stonith-ng", "fence_dummy", params);
+ stonith_key_value_freeall(params, 1, 1);
+
+ params = NULL;
+ params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
+ "true_1_node1=1,2");
+ params = stonith_key_value_add(params, "mode", "pass");
+ st->cmds->register_device(st, st_opts, "true_1", "stonith-ng", "fence_dummy", params);
+ stonith_key_value_freeall(params, 1, 1);
+
+ params = NULL;
+ params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
+ "custom_timeout_node1=1,2");
+ params = stonith_key_value_add(params, "mode", "fail");
+ params = stonith_key_value_add(params, "delay", "1000");
+ snprintf(buf, sizeof(buf) - 1, "%d", MAINLOOP_DEFAULT_TIMEOUT + CUSTOM_TIMEOUT_ADDITION);
+ params = stonith_key_value_add(params, "pcmk_off_timeout", buf);
+ st->cmds->register_device(st, st_opts, "false_custom_timeout", "stonith-ng", "fence_dummy",
+ params);
+ stonith_key_value_freeall(params, 1, 1);
+
+ mainloop_test_done(__func__, true);
+}
+
+static void
+try_mainloop_connect(int check_event)
+{
+ int rc = stonith_api_connect_retry(st, crm_system_name, 10);
+
+ if (rc == pcmk_ok) {
+ mainloop_test_done(__func__, true);
+ return;
+ }
+ crm_err("API CONNECTION FAILURE");
+ mainloop_test_done(__func__, false);
+}
+
+static void
+iterate_mainloop_tests(gboolean event_ready)
+{
+ static mainloop_test_iteration_cb callbacks[] = {
+ try_mainloop_connect,
+ test_register_async_devices,
+ test_async_monitor,
+ test_async_fence_pass,
+ test_async_fence_timeout,
+ test_async_fence_custom_timeout,
+ };
+
+ if (mainloop_iter == (sizeof(callbacks) / sizeof(mainloop_test_iteration_cb))) {
+ /* all tests ran, everything passed */
+ crm_info("ALL MAINLOOP TESTS PASSED!");
+ crm_exit(CRM_EX_OK);
+ }
+
+ callbacks[mainloop_iter] (event_ready);
+}
+
+static gboolean
+trigger_iterate_mainloop_tests(gpointer user_data)
+{
+ iterate_mainloop_tests(FALSE);
+ return TRUE;
+}
+
+static void
+test_shutdown(int nsig)
+{
+ int rc = 0;
+
+ if (st) {
+ rc = st->cmds->disconnect(st);
+ crm_info("Disconnect: %d", rc);
+
+ crm_debug("Destroy");
+ stonith_api_delete(st);
+ }
+
+ if (rc) {
+ crm_exit(CRM_EX_ERROR);
+ }
+}
+
+static void
+mainloop_tests(void)
+{
+ trig = mainloop_add_trigger(G_PRIORITY_HIGH, trigger_iterate_mainloop_tests, NULL);
+ mainloop_set_trigger(trig);
+ mainloop_add_signal(SIGTERM, test_shutdown);
+
+ crm_info("Starting");
+ mainloop = g_main_loop_new(NULL, FALSE);
+ g_main_loop_run(mainloop);
+}
+
+static GOptionContext *
+build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
+ GOptionContext *context = NULL;
+
+ context = pcmk__build_arg_context(args, NULL, group, NULL);
+ pcmk__add_main_args(context, entries);
+ return context;
+}
+
+int
+main(int argc, char **argv)
+{
+ GError *error = NULL;
+ crm_exit_t exit_code = CRM_EX_OK;
+
+ pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
+ gchar **processed_args = pcmk__cmdline_preproc(argv, NULL);
+ GOptionContext *context = build_arg_context(args, NULL);
+
+ if (!g_option_context_parse_strv(context, &processed_args, &error)) {
+ exit_code = CRM_EX_USAGE;
+ goto done;
+ }
+
+ /* We have to use crm_log_init here to set up the logging because there's
+ * different handling for daemons vs. command line programs, and
+ * pcmk__cli_init_logging is set up to only handle the latter.
+ */
+ crm_log_init(NULL, LOG_INFO, TRUE, (verbose? TRUE : FALSE), argc, argv,
+ FALSE);
+
+ for (int i = 0; i < args->verbosity; i++) {
+ crm_bump_log_level(argc, argv);
+ }
+
+ st = stonith_api_new();
+ if (st == NULL) {
+ exit_code = CRM_EX_DISCONNECT;
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Could not connect to fencer: API memory allocation failed");
+ goto done;
+ }
+
+ switch (options.mode) {
+ case test_standard:
+ standard_dev_test();
+ break;
+ case test_passive:
+ passive_test();
+ break;
+ case test_api_sanity:
+ sanity_tests();
+ break;
+ case test_api_mainloop:
+ mainloop_tests();
+ break;
+ }
+
+ test_shutdown(0);
+
+done:
+ g_strfreev(processed_args);
+ pcmk__free_arg_context(context);
+
+ pcmk__output_and_clear_error(&error, NULL);
+ crm_exit(exit_code);
+}
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
new file mode 100644
index 0000000..ba63cf8
--- /dev/null
+++ b/daemons/fenced/fenced_commands.c
@@ -0,0 +1,3674 @@
+/*
+ * Copyright 2009-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/utsname.h>
+
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <ctype.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/cluster/internal.h>
+#include <crm/common/mainloop.h>
+
+#include <crm/stonith-ng.h>
+#include <crm/fencing/internal.h>
+#include <crm/common/xml.h>
+
+#include <pacemaker-fenced.h>
+
+GHashTable *device_list = NULL;
+GHashTable *topology = NULL;
+static GList *cmd_list = NULL;
+
+static GHashTable *fenced_handlers = NULL;
+
+struct device_search_s {
+ /* target of fence action */
+ char *host;
+ /* requested fence action */
+ char *action;
+ /* timeout to use if a device is queried dynamically for possible targets */
+ int per_device_timeout;
+ /* number of registered fencing devices at time of request */
+ int replies_needed;
+ /* number of device replies received so far */
+ int replies_received;
+ /* whether the target is eligible to perform requested action (or off) */
+ bool allow_suicide;
+
+ /* private data to pass to search callback function */
+ void *user_data;
+ /* function to call when all replies have been received */
+ void (*callback) (GList * devices, void *user_data);
+ /* devices capable of performing requested action (or off if remapping) */
+ GList *capable;
+ /* Whether to perform searches that support the action */
+ uint32_t support_action_only;
+};
+
+static gboolean stonith_device_dispatch(gpointer user_data);
+static void st_child_done(int pid, const pcmk__action_result_t *result,
+ void *user_data);
+static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer,
+ pcmk__client_t *client);
+
+static void search_devices_record_result(struct device_search_s *search, const char *device,
+ gboolean can_fence);
+
+static int get_agent_metadata(const char *agent, xmlNode **metadata);
+static void read_action_metadata(stonith_device_t *device);
+static enum fenced_target_by unpack_level_kind(const xmlNode *level);
+
+typedef struct async_command_s {
+
+ int id;
+ int pid;
+ int fd_stdout;
+ int options;
+ int default_timeout; /* seconds */
+ int timeout; /* seconds */
+
+ int start_delay; // seconds (-1 means disable static/random fencing delays)
+ int delay_id;
+
+ char *op;
+ char *origin;
+ char *client;
+ char *client_name;
+ char *remote_op_id;
+
+ char *target;
+ uint32_t target_nodeid;
+ char *action;
+ char *device;
+
+ GList *device_list;
+ GList *next_device_iter; // device_list entry for next device to execute
+
+ void *internal_user_data;
+ void (*done_cb) (int pid, const pcmk__action_result_t *result,
+ void *user_data);
+ guint timer_sigterm;
+ guint timer_sigkill;
+ /*! If the operation timed out, this is the last signal
+ * we sent to the process to get it to terminate */
+ int last_timeout_signo;
+
+ stonith_device_t *active_on;
+ stonith_device_t *activating_on;
+} async_command_t;
+
+static xmlNode *construct_async_reply(const async_command_t *cmd,
+ const pcmk__action_result_t *result);
+
+static gboolean
+is_action_required(const char *action, const stonith_device_t *device)
+{
+ return (device != NULL) && device->automatic_unfencing
+ && pcmk__str_eq(action, "on", pcmk__str_none);
+}
+
+static int
+get_action_delay_max(const stonith_device_t *device, const char *action)
+{
+ const char *value = NULL;
+ int delay_max = 0;
+
+ if (!pcmk__is_fencing_action(action)) {
+ return 0;
+ }
+
+ value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX);
+ if (value) {
+ delay_max = crm_parse_interval_spec(value) / 1000;
+ }
+
+ return delay_max;
+}
+
+static int
+get_action_delay_base(const stonith_device_t *device, const char *action,
+ const char *target)
+{
+ char *hash_value = NULL;
+ int delay_base = 0;
+
+ if (!pcmk__is_fencing_action(action)) {
+ return 0;
+ }
+
+ hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE);
+
+ if (hash_value) {
+ char *value = strdup(hash_value);
+ char *valptr = value;
+
+ CRM_ASSERT(value != NULL);
+
+ if (target != NULL) {
+ for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) {
+ char *mapval = strchr(val, ':');
+
+ if (mapval == NULL || mapval[1] == 0) {
+ crm_err("pcmk_delay_base: empty value in mapping", val);
+ continue;
+ }
+
+ if (mapval != val && strncasecmp(target, val, (size_t)(mapval - val)) == 0) {
+ value = mapval + 1;
+ crm_debug("pcmk_delay_base mapped to %s for %s",
+ value, target);
+ break;
+ }
+ }
+ }
+
+ if (strchr(value, ':') == 0) {
+ delay_base = crm_parse_interval_spec(value) / 1000;
+ }
+
+ free(valptr);
+ }
+
+ return delay_base;
+}
+
+/*!
+ * \internal
+ * \brief Override STONITH timeout with pcmk_*_timeout if available
+ *
+ * \param[in] device STONITH device to use
+ * \param[in] action STONITH action name
+ * \param[in] default_timeout Timeout to use if device does not have
+ * a pcmk_*_timeout parameter for action
+ *
+ * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout
+ * \note For consistency, it would be nice if reboot/off/on timeouts could be
+ * set the same way as start/stop/monitor timeouts, i.e. with an
+ * <operation> entry in the fencing resource configuration. However that
+ * is insufficient because fencing devices may be registered directly via
+ * the fencer's register_device() API instead of going through the CIB
+ * (e.g. stonith_admin uses it for its -R option, and the executor uses it
+ * to ensure a device is registered when a command is issued). As device
+ * properties, pcmk_*_timeout parameters can be grabbed by the fencer when
+ * the device is registered, whether by CIB change or API call.
+ */
+static int
+get_action_timeout(const stonith_device_t *device, const char *action,
+ int default_timeout)
+{
+ if (action && device && device->params) {
+ char buffer[64] = { 0, };
+ const char *value = NULL;
+
+ /* If "reboot" was requested but the device does not support it,
+ * we will remap to "off", so check timeout for "off" instead
+ */
+ if (pcmk__str_eq(action, "reboot", pcmk__str_none)
+ && !pcmk_is_set(device->flags, st_device_supports_reboot)) {
+ crm_trace("%s doesn't support reboot, using timeout for off instead",
+ device->id);
+ action = "off";
+ }
+
+ /* If the device config specified an action-specific timeout, use it */
+ snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action);
+ value = g_hash_table_lookup(device->params, buffer);
+ if (value) {
+ return atoi(value);
+ }
+ }
+ return default_timeout;
+}
+
+/*!
+ * \internal
+ * \brief Get the currently executing device for a fencing operation
+ *
+ * \param[in] cmd Fencing operation to check
+ *
+ * \return Currently executing device for \p cmd if any, otherwise NULL
+ */
+static stonith_device_t *
+cmd_device(const async_command_t *cmd)
+{
+ if ((cmd == NULL) || (cmd->device == NULL) || (device_list == NULL)) {
+ return NULL;
+ }
+ return g_hash_table_lookup(device_list, cmd->device);
+}
+
+/*!
+ * \internal
+ * \brief Return the configured reboot action for a given device
+ *
+ * \param[in] device_id Device ID
+ *
+ * \return Configured reboot action for \p device_id
+ */
+const char *
+fenced_device_reboot_action(const char *device_id)
+{
+ const char *action = NULL;
+
+ if ((device_list != NULL) && (device_id != NULL)) {
+ stonith_device_t *device = g_hash_table_lookup(device_list, device_id);
+
+ if ((device != NULL) && (device->params != NULL)) {
+ action = g_hash_table_lookup(device->params, "pcmk_reboot_action");
+ }
+ }
+ return pcmk__s(action, "reboot");
+}
+
+/*!
+ * \internal
+ * \brief Check whether a given device supports the "on" action
+ *
+ * \param[in] device_id Device ID
+ *
+ * \return true if \p device_id supports "on", otherwise false
+ */
+bool
+fenced_device_supports_on(const char *device_id)
+{
+ if ((device_list != NULL) && (device_id != NULL)) {
+ stonith_device_t *device = g_hash_table_lookup(device_list, device_id);
+
+ if (device != NULL) {
+ return pcmk_is_set(device->flags, st_device_supports_on);
+ }
+ }
+ return false;
+}
+
+static void
+free_async_command(async_command_t * cmd)
+{
+ if (!cmd) {
+ return;
+ }
+
+ if (cmd->delay_id) {
+ g_source_remove(cmd->delay_id);
+ }
+
+ cmd_list = g_list_remove(cmd_list, cmd);
+
+ g_list_free_full(cmd->device_list, free);
+ free(cmd->device);
+ free(cmd->action);
+ free(cmd->target);
+ free(cmd->remote_op_id);
+ free(cmd->client);
+ free(cmd->client_name);
+ free(cmd->origin);
+ free(cmd->op);
+ free(cmd);
+}
+
+/*!
+ * \internal
+ * \brief Create a new asynchronous fencing operation from request XML
+ *
+ * \param[in] msg Fencing request XML (from IPC or CPG)
+ *
+ * \return Newly allocated fencing operation on success, otherwise NULL
+ *
+ * \note This asserts on memory errors, so a NULL return indicates an
+ * unparseable message.
+ */
+static async_command_t *
+create_async_command(xmlNode *msg)
+{
+ xmlNode *op = NULL;
+ async_command_t *cmd = NULL;
+
+ if (msg == NULL) {
+ return NULL;
+ }
+
+ op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR);
+ if (op == NULL) {
+ return NULL;
+ }
+
+ cmd = calloc(1, sizeof(async_command_t));
+ CRM_ASSERT(cmd != NULL);
+
+ // All messages must include these
+ cmd->action = crm_element_value_copy(op, F_STONITH_ACTION);
+ cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION);
+ cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID);
+ if ((cmd->action == NULL) || (cmd->op == NULL) || (cmd->client == NULL)) {
+ free_async_command(cmd);
+ return NULL;
+ }
+
+ crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id));
+ crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options));
+ crm_element_value_int(msg, F_STONITH_DELAY, &(cmd->start_delay));
+ crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout));
+ cmd->timeout = cmd->default_timeout;
+
+ cmd->origin = crm_element_value_copy(msg, F_ORIG);
+ cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID);
+ cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME);
+ cmd->target = crm_element_value_copy(op, F_STONITH_TARGET);
+ cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE);
+
+ cmd->done_cb = st_child_done;
+
+ // Track in global command list
+ cmd_list = g_list_append(cmd_list, cmd);
+
+ return cmd;
+}
+
+static int
+get_action_limit(stonith_device_t * device)
+{
+ const char *value = NULL;
+ int action_limit = 1;
+
+ value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT);
+ if ((value == NULL)
+ || (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok)
+ || (action_limit == 0)) {
+ action_limit = 1;
+ }
+ return action_limit;
+}
+
+static int
+get_active_cmds(stonith_device_t * device)
+{
+ int counter = 0;
+ GList *gIter = NULL;
+ GList *gIterNext = NULL;
+
+ CRM_CHECK(device != NULL, return 0);
+
+ for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
+ async_command_t *cmd = gIter->data;
+
+ gIterNext = gIter->next;
+
+ if (cmd->active_on == device) {
+ counter++;
+ }
+ }
+
+ return counter;
+}
+
+static void
+fork_cb(int pid, void *user_data)
+{
+ async_command_t *cmd = (async_command_t *) user_data;
+ stonith_device_t * device =
+ /* in case of a retry we've done the move from
+ activating_on to active_on already
+ */
+ cmd->activating_on?cmd->activating_on:cmd->active_on;
+
+ CRM_ASSERT(device);
+ crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout",
+ cmd->action, pid,
+ ((cmd->target == NULL)? "" : " targeting "),
+ pcmk__s(cmd->target, ""), device->id, cmd->timeout);
+ cmd->active_on = device;
+ cmd->activating_on = NULL;
+}
+
+static int
+get_agent_metadata_cb(gpointer data) {
+ stonith_device_t *device = data;
+ guint period_ms;
+
+ switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
+ case pcmk_rc_ok:
+ if (device->agent_metadata) {
+ read_action_metadata(device);
+ stonith__device_parameter_flags(&(device->flags), device->id,
+ device->agent_metadata);
+ }
+ return G_SOURCE_REMOVE;
+
+ case EAGAIN:
+ period_ms = pcmk__mainloop_timer_get_period(device->timer);
+ if (period_ms < 160 * 1000) {
+ mainloop_timer_set_period(device->timer, 2 * period_ms);
+ }
+ return G_SOURCE_CONTINUE;
+
+ default:
+ return G_SOURCE_REMOVE;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Call a command's action callback for an internal (not library) result
+ *
+ * \param[in,out] cmd Command to report result for
+ * \param[in] execution_status Execution status to use for result
+ * \param[in] exit_status Exit status to use for result
+ * \param[in] exit_reason Exit reason to use for result
+ */
+static void
+report_internal_result(async_command_t *cmd, int exit_status,
+ int execution_status, const char *exit_reason)
+{
+ pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+
+ pcmk__set_result(&result, exit_status, execution_status, exit_reason);
+ cmd->done_cb(0, &result, cmd);
+ pcmk__reset_result(&result);
+}
+
+static gboolean
+stonith_device_execute(stonith_device_t * device)
+{
+ int exec_rc = 0;
+ const char *action_str = NULL;
+ const char *host_arg = NULL;
+ async_command_t *cmd = NULL;
+ stonith_action_t *action = NULL;
+ int active_cmds = 0;
+ int action_limit = 0;
+ GList *gIter = NULL;
+ GList *gIterNext = NULL;
+
+ CRM_CHECK(device != NULL, return FALSE);
+
+ active_cmds = get_active_cmds(device);
+ action_limit = get_action_limit(device);
+ if (action_limit > -1 && active_cmds >= action_limit) {
+ crm_trace("%s is over its action limit of %d (%u active action%s)",
+ device->id, action_limit, active_cmds,
+ pcmk__plural_s(active_cmds));
+ return TRUE;
+ }
+
+ for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) {
+ async_command_t *pending_op = gIter->data;
+
+ gIterNext = gIter->next;
+
+ if (pending_op && pending_op->delay_id) {
+ crm_trace("Operation '%s'%s%s using %s was asked to run too early, "
+ "waiting for start delay of %ds",
+ pending_op->action,
+ ((pending_op->target == NULL)? "" : " targeting "),
+ pcmk__s(pending_op->target, ""),
+ device->id, pending_op->start_delay);
+ continue;
+ }
+
+ device->pending_ops = g_list_remove_link(device->pending_ops, gIter);
+ g_list_free_1(gIter);
+
+ cmd = pending_op;
+ break;
+ }
+
+ if (cmd == NULL) {
+ crm_trace("No actions using %s are needed", device->id);
+ return TRUE;
+ }
+
+ if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
+ STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
+ if (pcmk__is_fencing_action(cmd->action)) {
+ if (node_does_watchdog_fencing(stonith_our_uname)) {
+ pcmk__panic(__func__);
+ goto done;
+ }
+ } else {
+ crm_info("Faking success for %s watchdog operation", cmd->action);
+ report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ goto done;
+ }
+ }
+
+#if SUPPORT_CIBSECRETS
+ exec_rc = pcmk__substitute_secrets(device->id, device->params);
+ if (exec_rc != pcmk_rc_ok) {
+ if (pcmk__str_eq(cmd->action, "stop", pcmk__str_none)) {
+ crm_info("Proceeding with stop operation for %s "
+ "despite being unable to load CIB secrets (%s)",
+ device->id, pcmk_rc_str(exec_rc));
+ } else {
+ crm_err("Considering %s unconfigured "
+ "because unable to load CIB secrets: %s",
+ device->id, pcmk_rc_str(exec_rc));
+ report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS,
+ "Failed to get CIB secrets");
+ goto done;
+ }
+ }
+#endif
+
+ action_str = cmd->action;
+ if (pcmk__str_eq(cmd->action, "reboot", pcmk__str_none)
+ && !pcmk_is_set(device->flags, st_device_supports_reboot)) {
+
+ crm_notice("Remapping 'reboot' action%s%s using %s to 'off' "
+ "because agent '%s' does not support reboot",
+ ((cmd->target == NULL)? "" : " targeting "),
+ pcmk__s(cmd->target, ""), device->id, device->agent);
+ action_str = "off";
+ }
+
+ if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) {
+ host_arg = "port";
+
+ } else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) {
+ host_arg = "plug";
+ }
+
+ action = stonith__action_create(device->agent, action_str, cmd->target,
+ cmd->target_nodeid, cmd->timeout,
+ device->params, device->aliases, host_arg);
+
+ /* for async exec, exec_rc is negative for early error exit
+ otherwise handling of success/errors is done via callbacks */
+ cmd->activating_on = device;
+ exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb,
+ fork_cb);
+ if (exec_rc < 0) {
+ cmd->activating_on = NULL;
+ cmd->done_cb(0, stonith__action_result(action), cmd);
+ stonith__destroy_action(action);
+ }
+
+done:
+ /* Device might get triggered to work by multiple fencing commands
+ * simultaneously. Trigger the device again to make sure any
+ * remaining concurrent commands get executed. */
+ if (device->pending_ops) {
+ mainloop_set_trigger(device->work);
+ }
+ return TRUE;
+}
+
+static gboolean
+stonith_device_dispatch(gpointer user_data)
+{
+ return stonith_device_execute(user_data);
+}
+
+static gboolean
+start_delay_helper(gpointer data)
+{
+ async_command_t *cmd = data;
+ stonith_device_t *device = cmd_device(cmd);
+
+ cmd->delay_id = 0;
+ if (device) {
+ mainloop_set_trigger(device->work);
+ }
+
+ return FALSE;
+}
+
+static void
+schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
+{
+ int delay_max = 0;
+ int delay_base = 0;
+ int requested_delay = cmd->start_delay;
+
+ CRM_CHECK(cmd != NULL, return);
+ CRM_CHECK(device != NULL, return);
+
+ if (cmd->device) {
+ free(cmd->device);
+ }
+
+ if (device->include_nodeid && (cmd->target != NULL)) {
+ crm_node_t *node = crm_get_peer(0, cmd->target);
+
+ cmd->target_nodeid = node->id;
+ }
+
+ cmd->device = strdup(device->id);
+ cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout);
+
+ if (cmd->remote_op_id) {
+ crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s "
+ "with op id %.8s and timeout %ds",
+ cmd->action,
+ (cmd->target == NULL)? "" : " targeting ",
+ pcmk__s(cmd->target, ""),
+ device->id, cmd->origin, cmd->remote_op_id, cmd->timeout);
+ } else {
+ crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds",
+ cmd->action,
+ (cmd->target == NULL)? "" : " targeting ",
+ pcmk__s(cmd->target, ""),
+ device->id, cmd->client, cmd->timeout);
+ }
+
+ device->pending_ops = g_list_append(device->pending_ops, cmd);
+ mainloop_set_trigger(device->work);
+
+ // Value -1 means disable any static/random fencing delays
+ if (requested_delay < 0) {
+ return;
+ }
+
+ delay_max = get_action_delay_max(device, cmd->action);
+ delay_base = get_action_delay_base(device, cmd->action, cmd->target);
+ if (delay_max == 0) {
+ delay_max = delay_base;
+ }
+ if (delay_max < delay_base) {
+ crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than "
+ PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s "
+ "(limiting to maximum delay)",
+ delay_base, delay_max, cmd->action, device->id);
+ delay_base = delay_max;
+ }
+ if (delay_max > 0) {
+ // coverity[dont_call] We're not using rand() for security
+ cmd->start_delay +=
+ ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
+ + delay_base;
+ }
+
+ if (cmd->start_delay > 0) {
+ crm_notice("Delaying '%s' action%s%s using %s for %ds " CRM_XS
+ " timeout=%ds requested_delay=%ds base=%ds max=%ds",
+ cmd->action,
+ (cmd->target == NULL)? "" : " targeting ",
+ pcmk__s(cmd->target, ""),
+ device->id, cmd->start_delay, cmd->timeout,
+ requested_delay, delay_base, delay_max);
+ cmd->delay_id =
+ g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd);
+ }
+}
+
+static void
+free_device(gpointer data)
+{
+ GList *gIter = NULL;
+ stonith_device_t *device = data;
+
+ g_hash_table_destroy(device->params);
+ g_hash_table_destroy(device->aliases);
+
+ for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) {
+ async_command_t *cmd = gIter->data;
+
+ crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action);
+ report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
+ "Device was removed before action could be executed");
+ }
+ g_list_free(device->pending_ops);
+
+ g_list_free_full(device->targets, free);
+
+ if (device->timer) {
+ mainloop_timer_stop(device->timer);
+ mainloop_timer_del(device->timer);
+ }
+
+ mainloop_destroy_trigger(device->work);
+
+ free_xml(device->agent_metadata);
+ free(device->namespace);
+ if (device->on_target_actions != NULL) {
+ g_string_free(device->on_target_actions, TRUE);
+ }
+ free(device->agent);
+ free(device->id);
+ free(device);
+}
+
+void free_device_list(void)
+{
+ if (device_list != NULL) {
+ g_hash_table_destroy(device_list);
+ device_list = NULL;
+ }
+}
+
+void
+init_device_list(void)
+{
+ if (device_list == NULL) {
+ device_list = pcmk__strkey_table(NULL, free_device);
+ }
+}
+
+static GHashTable *
+build_port_aliases(const char *hostmap, GList ** targets)
+{
+ char *name = NULL;
+ int last = 0, lpc = 0, max = 0, added = 0;
+ GHashTable *aliases = pcmk__strikey_table(free, free);
+
+ if (hostmap == NULL) {
+ return aliases;
+ }
+
+ max = strlen(hostmap);
+ for (; lpc <= max; lpc++) {
+ switch (hostmap[lpc]) {
+ /* Skip escaped chars */
+ case '\\':
+ lpc++;
+ break;
+
+ /* Assignment chars */
+ case '=':
+ case ':':
+ if (lpc > last) {
+ free(name);
+ name = calloc(1, 1 + lpc - last);
+ memcpy(name, hostmap + last, lpc - last);
+ }
+ last = lpc + 1;
+ break;
+
+ /* Delimeter chars */
+ /* case ',': Potentially used to specify multiple ports */
+ case 0:
+ case ';':
+ case ' ':
+ case '\t':
+ if (name) {
+ char *value = NULL;
+ int k = 0;
+
+ value = calloc(1, 1 + lpc - last);
+ memcpy(value, hostmap + last, lpc - last);
+
+ for (int i = 0; value[i] != '\0'; i++) {
+ if (value[i] != '\\') {
+ value[k++] = value[i];
+ }
+ }
+ value[k] = '\0';
+
+ crm_debug("Adding alias '%s'='%s'", name, value);
+ g_hash_table_replace(aliases, name, value);
+ if (targets) {
+ *targets = g_list_append(*targets, strdup(value));
+ }
+ value = NULL;
+ name = NULL;
+ added++;
+
+ } else if (lpc > last) {
+ crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last);
+ }
+
+ last = lpc + 1;
+ break;
+ }
+
+ if (hostmap[lpc] == 0) {
+ break;
+ }
+ }
+
+ if (added == 0) {
+ crm_info("No host mappings detected in '%s'", hostmap);
+ }
+
+ free(name);
+ return aliases;
+}
+
+GHashTable *metadata_cache = NULL;
+
+void
+free_metadata_cache(void) {
+ if (metadata_cache != NULL) {
+ g_hash_table_destroy(metadata_cache);
+ metadata_cache = NULL;
+ }
+}
+
+static void
+init_metadata_cache(void) {
+ if (metadata_cache == NULL) {
+ metadata_cache = pcmk__strkey_table(free, free);
+ }
+}
+
+int
+get_agent_metadata(const char *agent, xmlNode ** metadata)
+{
+ char *buffer = NULL;
+
+ if (metadata == NULL) {
+ return EINVAL;
+ }
+ *metadata = NULL;
+ if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) {
+ return pcmk_rc_ok;
+ }
+ init_metadata_cache();
+ buffer = g_hash_table_lookup(metadata_cache, agent);
+ if (buffer == NULL) {
+ stonith_t *st = stonith_api_new();
+ int rc;
+
+ if (st == NULL) {
+ crm_warn("Could not get agent meta-data: "
+ "API memory allocation failed");
+ return EAGAIN;
+ }
+ rc = st->cmds->metadata(st, st_opt_sync_call, agent,
+ NULL, &buffer, 10);
+ stonith_api_delete(st);
+ if (rc || !buffer) {
+ crm_err("Could not retrieve metadata for fencing agent %s", agent);
+ return EAGAIN;
+ }
+ g_hash_table_replace(metadata_cache, strdup(agent), buffer);
+ }
+
+ *metadata = string2xml(buffer);
+ return pcmk_rc_ok;
+}
+
+static gboolean
+is_nodeid_required(xmlNode * xml)
+{
+ xmlXPathObjectPtr xpath = NULL;
+
+ if (stand_alone) {
+ return FALSE;
+ }
+
+ if (!xml) {
+ return FALSE;
+ }
+
+ xpath = xpath_search(xml, "//parameter[@name='nodeid']");
+ if (numXpathResults(xpath) <= 0) {
+ freeXpathObject(xpath);
+ return FALSE;
+ }
+
+ freeXpathObject(xpath);
+ return TRUE;
+}
+
+static void
+read_action_metadata(stonith_device_t *device)
+{
+ xmlXPathObjectPtr xpath = NULL;
+ int max = 0;
+ int lpc = 0;
+
+ if (device->agent_metadata == NULL) {
+ return;
+ }
+
+ xpath = xpath_search(device->agent_metadata, "//action");
+ max = numXpathResults(xpath);
+
+ if (max <= 0) {
+ freeXpathObject(xpath);
+ return;
+ }
+
+ for (lpc = 0; lpc < max; lpc++) {
+ const char *action = NULL;
+ xmlNode *match = getXpathResult(xpath, lpc);
+
+ CRM_LOG_ASSERT(match != NULL);
+ if(match == NULL) { continue; };
+
+ action = crm_element_value(match, "name");
+
+ if (pcmk__str_eq(action, "list", pcmk__str_none)) {
+ stonith__set_device_flags(device->flags, device->id,
+ st_device_supports_list);
+ } else if (pcmk__str_eq(action, "status", pcmk__str_none)) {
+ stonith__set_device_flags(device->flags, device->id,
+ st_device_supports_status);
+ } else if (pcmk__str_eq(action, "reboot", pcmk__str_none)) {
+ stonith__set_device_flags(device->flags, device->id,
+ st_device_supports_reboot);
+ } else if (pcmk__str_eq(action, "on", pcmk__str_none)) {
+ /* "automatic" means the cluster will unfence node when it joins */
+ /* "required" is a deprecated synonym for "automatic" */
+ if (pcmk__xe_attr_is_true(match, "automatic") || pcmk__xe_attr_is_true(match, "required")) {
+ device->automatic_unfencing = TRUE;
+ }
+ stonith__set_device_flags(device->flags, device->id,
+ st_device_supports_on);
+ }
+
+ if ((action != NULL) && pcmk__xe_attr_is_true(match, "on_target")) {
+ pcmk__add_word(&(device->on_target_actions), 64, action);
+ }
+ }
+
+ freeXpathObject(xpath);
+}
+
+/*!
+ * \internal
+ * \brief Set a pcmk_*_action parameter if not already set
+ *
+ * \param[in,out] params Device parameters
+ * \param[in] action Name of action
+ * \param[in] value Value to use if action is not already set
+ */
+static void
+map_action(GHashTable *params, const char *action, const char *value)
+{
+ char *key = crm_strdup_printf("pcmk_%s_action", action);
+
+ if (g_hash_table_lookup(params, key)) {
+ crm_warn("Ignoring %s='%s', see %s instead",
+ STONITH_ATTR_ACTION_OP, value, key);
+ free(key);
+ } else {
+ crm_warn("Mapping %s='%s' to %s='%s'",
+ STONITH_ATTR_ACTION_OP, value, key, value);
+ g_hash_table_insert(params, key, strdup(value));
+ }
+}
+
+/*!
+ * \internal
+ * \brief Create device parameter table from XML
+ *
+ * \param[in] name Device name (used for logging only)
+ * \param[in] dev XML containing device parameters
+ */
+static GHashTable *
+xml2device_params(const char *name, const xmlNode *dev)
+{
+ GHashTable *params = xml2list(dev);
+ const char *value;
+
+ /* Action should never be specified in the device configuration,
+ * but we support it for users who are familiar with other software
+ * that worked that way.
+ */
+ value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP);
+ if (value != NULL) {
+ crm_warn("%s has '%s' parameter, which should never be specified in configuration",
+ name, STONITH_ATTR_ACTION_OP);
+
+ if (*value == '\0') {
+ crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP);
+
+ } else if (strcmp(value, "reboot") == 0) {
+ crm_warn("Ignoring %s='reboot' (see stonith-action cluster property instead)",
+ STONITH_ATTR_ACTION_OP);
+
+ } else if (strcmp(value, "off") == 0) {
+ map_action(params, "reboot", value);
+
+ } else {
+ map_action(params, "off", value);
+ map_action(params, "reboot", value);
+ }
+
+ g_hash_table_remove(params, STONITH_ATTR_ACTION_OP);
+ }
+
+ return params;
+}
+
+static const char *
+target_list_type(stonith_device_t * dev)
+{
+ const char *check_type = NULL;
+
+ check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK);
+
+ if (check_type == NULL) {
+
+ if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) {
+ check_type = "static-list";
+ } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) {
+ check_type = "static-list";
+ } else if (pcmk_is_set(dev->flags, st_device_supports_list)) {
+ check_type = "dynamic-list";
+ } else if (pcmk_is_set(dev->flags, st_device_supports_status)) {
+ check_type = "status";
+ } else {
+ check_type = PCMK__VALUE_NONE;
+ }
+ }
+
+ return check_type;
+}
+
+static stonith_device_t *
+build_device_from_xml(xmlNode *dev)
+{
+ const char *value;
+ stonith_device_t *device = NULL;
+ char *agent = crm_element_value_copy(dev, "agent");
+
+ CRM_CHECK(agent != NULL, return device);
+
+ device = calloc(1, sizeof(stonith_device_t));
+
+ CRM_CHECK(device != NULL, {free(agent); return device;});
+
+ device->id = crm_element_value_copy(dev, XML_ATTR_ID);
+ device->agent = agent;
+ device->namespace = crm_element_value_copy(dev, "namespace");
+ device->params = xml2device_params(device->id, dev);
+
+ value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST);
+ if (value) {
+ device->targets = stonith__parse_targets(value);
+ }
+
+ value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP);
+ device->aliases = build_port_aliases(value, &(device->targets));
+
+ value = target_list_type(device);
+ if (!pcmk__str_eq(value, "static-list", pcmk__str_casei) && device->targets) {
+ /* Other than "static-list", dev-> targets is unnecessary. */
+ g_list_free_full(device->targets, free);
+ device->targets = NULL;
+ }
+ switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
+ case pcmk_rc_ok:
+ if (device->agent_metadata) {
+ read_action_metadata(device);
+ stonith__device_parameter_flags(&(device->flags), device->id,
+ device->agent_metadata);
+ }
+ break;
+
+ case EAGAIN:
+ if (device->timer == NULL) {
+ device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
+ TRUE, get_agent_metadata_cb, device);
+ }
+ if (!mainloop_timer_running(device->timer)) {
+ mainloop_timer_start(device->timer);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ value = g_hash_table_lookup(device->params, "nodeid");
+ if (!value) {
+ device->include_nodeid = is_nodeid_required(device->agent_metadata);
+ }
+
+ value = crm_element_value(dev, "rsc_provides");
+ if (pcmk__str_eq(value, PCMK__VALUE_UNFENCING, pcmk__str_casei)) {
+ device->automatic_unfencing = TRUE;
+ }
+
+ if (is_action_required("on", device)) {
+ crm_info("Fencing device '%s' requires unfencing", device->id);
+ }
+
+ if (device->on_target_actions != NULL) {
+ crm_info("Fencing device '%s' requires actions (%s) to be executed "
+ "on target", device->id,
+ (const char *) device->on_target_actions->str);
+ }
+
+ device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device);
+ /* TODO: Hook up priority */
+
+ return device;
+}
+
+static void
+schedule_internal_command(const char *origin,
+ stonith_device_t * device,
+ const char *action,
+ const char *target,
+ int timeout,
+ void *internal_user_data,
+ void (*done_cb) (int pid,
+ const pcmk__action_result_t *result,
+ void *user_data))
+{
+ async_command_t *cmd = NULL;
+
+ cmd = calloc(1, sizeof(async_command_t));
+
+ cmd->id = -1;
+ cmd->default_timeout = timeout ? timeout : 60;
+ cmd->timeout = cmd->default_timeout;
+ cmd->action = strdup(action);
+ pcmk__str_update(&cmd->target, target);
+ cmd->device = strdup(device->id);
+ cmd->origin = strdup(origin);
+ cmd->client = strdup(crm_system_name);
+ cmd->client_name = strdup(crm_system_name);
+
+ cmd->internal_user_data = internal_user_data;
+ cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */
+
+ schedule_stonith_command(cmd, device);
+}
+
+// Fence agent status commands use custom exit status codes
+enum fence_status_code {
+ fence_status_invalid = -1,
+ fence_status_active = 0,
+ fence_status_unknown = 1,
+ fence_status_inactive = 2,
+};
+
+static void
+status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data)
+{
+ async_command_t *cmd = user_data;
+ struct device_search_s *search = cmd->internal_user_data;
+ stonith_device_t *dev = cmd_device(cmd);
+ gboolean can = FALSE;
+
+ free_async_command(cmd);
+
+ if (!dev) {
+ search_devices_record_result(search, NULL, FALSE);
+ return;
+ }
+
+ mainloop_set_trigger(dev->work);
+
+ if (result->execution_status != PCMK_EXEC_DONE) {
+ crm_warn("Assuming %s cannot fence %s "
+ "because status could not be executed: %s%s%s%s",
+ dev->id, search->host,
+ pcmk_exec_status_str(result->execution_status),
+ ((result->exit_reason == NULL)? "" : " ("),
+ ((result->exit_reason == NULL)? "" : result->exit_reason),
+ ((result->exit_reason == NULL)? "" : ")"));
+ search_devices_record_result(search, dev->id, FALSE);
+ return;
+ }
+
+ switch (result->exit_status) {
+ case fence_status_unknown:
+ crm_trace("%s reported it cannot fence %s", dev->id, search->host);
+ break;
+
+ case fence_status_active:
+ case fence_status_inactive:
+ crm_trace("%s reported it can fence %s", dev->id, search->host);
+ can = TRUE;
+ break;
+
+ default:
+ crm_warn("Assuming %s cannot fence %s "
+ "(status returned unknown code %d)",
+ dev->id, search->host, result->exit_status);
+ break;
+ }
+ search_devices_record_result(search, dev->id, can);
+}
+
+static void
+dynamic_list_search_cb(int pid, const pcmk__action_result_t *result,
+ void *user_data)
+{
+ async_command_t *cmd = user_data;
+ struct device_search_s *search = cmd->internal_user_data;
+ stonith_device_t *dev = cmd_device(cmd);
+ gboolean can_fence = FALSE;
+
+ free_async_command(cmd);
+
+ /* Host/alias must be in the list output to be eligible to be fenced
+ *
+ * Will cause problems if down'd nodes aren't listed or (for virtual nodes)
+ * if the guest is still listed despite being moved to another machine
+ */
+ if (!dev) {
+ search_devices_record_result(search, NULL, FALSE);
+ return;
+ }
+
+ mainloop_set_trigger(dev->work);
+
+ if (pcmk__result_ok(result)) {
+ crm_info("Refreshing target list for %s", dev->id);
+ g_list_free_full(dev->targets, free);
+ dev->targets = stonith__parse_targets(result->action_stdout);
+ dev->targets_age = time(NULL);
+
+ } else if (dev->targets != NULL) {
+ if (result->execution_status == PCMK_EXEC_DONE) {
+ crm_info("Reusing most recent target list for %s "
+ "because list returned error code %d",
+ dev->id, result->exit_status);
+ } else {
+ crm_info("Reusing most recent target list for %s "
+ "because list could not be executed: %s%s%s%s",
+ dev->id, pcmk_exec_status_str(result->execution_status),
+ ((result->exit_reason == NULL)? "" : " ("),
+ ((result->exit_reason == NULL)? "" : result->exit_reason),
+ ((result->exit_reason == NULL)? "" : ")"));
+ }
+
+ } else { // We have never successfully executed list
+ if (result->execution_status == PCMK_EXEC_DONE) {
+ crm_warn("Assuming %s cannot fence %s "
+ "because list returned error code %d",
+ dev->id, search->host, result->exit_status);
+ } else {
+ crm_warn("Assuming %s cannot fence %s "
+ "because list could not be executed: %s%s%s%s",
+ dev->id, search->host,
+ pcmk_exec_status_str(result->execution_status),
+ ((result->exit_reason == NULL)? "" : " ("),
+ ((result->exit_reason == NULL)? "" : result->exit_reason),
+ ((result->exit_reason == NULL)? "" : ")"));
+ }
+
+ /* Fall back to pcmk_host_check="status" if the user didn't explicitly
+ * specify "dynamic-list".
+ */
+ if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) {
+ crm_notice("Switching to pcmk_host_check='status' for %s", dev->id);
+ g_hash_table_replace(dev->params, strdup(PCMK_STONITH_HOST_CHECK),
+ strdup("status"));
+ }
+ }
+
+ if (dev->targets) {
+ const char *alias = g_hash_table_lookup(dev->aliases, search->host);
+
+ if (!alias) {
+ alias = search->host;
+ }
+ if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) {
+ can_fence = TRUE;
+ }
+ }
+ search_devices_record_result(search, dev->id, can_fence);
+}
+
+/*!
+ * \internal
+ * \brief Returns true if any key in first is not in second or second has a different value for key
+ */
+static int
+device_params_diff(GHashTable *first, GHashTable *second) {
+ char *key = NULL;
+ char *value = NULL;
+ GHashTableIter gIter;
+
+ g_hash_table_iter_init(&gIter, first);
+ while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) {
+
+ if(strstr(key, "CRM_meta") == key) {
+ continue;
+ } else if(strcmp(key, "crm_feature_set") == 0) {
+ continue;
+ } else {
+ char *other_value = g_hash_table_lookup(second, key);
+
+ if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) {
+ crm_trace("Different value for %s: %s != %s", key, other_value, value);
+ return 1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*!
+ * \internal
+ * \brief Checks to see if an identical device already exists in the device_list
+ */
+static stonith_device_t *
+device_has_duplicate(const stonith_device_t *device)
+{
+ stonith_device_t *dup = g_hash_table_lookup(device_list, device->id);
+
+ if (!dup) {
+ crm_trace("No match for %s", device->id);
+ return NULL;
+
+ } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) {
+ crm_trace("Different agent: %s != %s", dup->agent, device->agent);
+ return NULL;
+ }
+
+ /* Use calculate_operation_digest() here? */
+ if (device_params_diff(device->params, dup->params) ||
+ device_params_diff(dup->params, device->params)) {
+ return NULL;
+ }
+
+ crm_trace("Match");
+ return dup;
+}
+
+int
+stonith_device_register(xmlNode *dev, gboolean from_cib)
+{
+ stonith_device_t *dup = NULL;
+ stonith_device_t *device = build_device_from_xml(dev);
+ guint ndevices = 0;
+ int rv = pcmk_ok;
+
+ CRM_CHECK(device != NULL, return -ENOMEM);
+
+ /* do we have a watchdog-device? */
+ if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) ||
+ pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
+ STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do {
+ if (stonith_watchdog_timeout_ms <= 0) {
+ crm_err("Ignoring watchdog fence device without "
+ "stonith-watchdog-timeout set.");
+ rv = -ENODEV;
+ /* fall through to cleanup & return */
+ } else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
+ STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
+ crm_err("Ignoring watchdog fence device with unknown "
+ "agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.",
+ device->agent?device->agent:"");
+ rv = -ENODEV;
+ /* fall through to cleanup & return */
+ } else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID,
+ pcmk__str_none)) {
+ crm_err("Ignoring watchdog fence device "
+ "named %s !='"STONITH_WATCHDOG_ID"'.",
+ device->id?device->id:"");
+ rv = -ENODEV;
+ /* fall through to cleanup & return */
+ } else {
+ if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT,
+ pcmk__str_none)) {
+ /* this either has an empty list or the targets
+ configured for watchdog-fencing
+ */
+ g_list_free_full(stonith_watchdog_targets, free);
+ stonith_watchdog_targets = device->targets;
+ device->targets = NULL;
+ }
+ if (node_does_watchdog_fencing(stonith_our_uname)) {
+ g_list_free_full(device->targets, free);
+ device->targets = stonith__parse_targets(stonith_our_uname);
+ g_hash_table_replace(device->params,
+ strdup(PCMK_STONITH_HOST_LIST),
+ strdup(stonith_our_uname));
+ /* proceed as with any other stonith-device */
+ break;
+ }
+
+ crm_debug("Skip registration of watchdog fence device on node not in host-list.");
+ /* cleanup and fall through to more cleanup and return */
+ device->targets = NULL;
+ stonith_device_remove(device->id, from_cib);
+ }
+ free_device(device);
+ return rv;
+ } while (0);
+
+ dup = device_has_duplicate(device);
+ if (dup) {
+ ndevices = g_hash_table_size(device_list);
+ crm_debug("Device '%s' already in device list (%d active device%s)",
+ device->id, ndevices, pcmk__plural_s(ndevices));
+ free_device(device);
+ device = dup;
+ dup = g_hash_table_lookup(device_list, device->id);
+ dup->dirty = FALSE;
+
+ } else {
+ stonith_device_t *old = g_hash_table_lookup(device_list, device->id);
+
+ if (from_cib && old && old->api_registered) {
+ /* If the cib is writing over an entry that is shared with a stonith client,
+ * copy any pending ops that currently exist on the old entry to the new one.
+ * Otherwise the pending ops will be reported as failures
+ */
+ crm_info("Overwriting existing entry for %s from CIB", device->id);
+ device->pending_ops = old->pending_ops;
+ device->api_registered = TRUE;
+ old->pending_ops = NULL;
+ if (device->pending_ops) {
+ mainloop_set_trigger(device->work);
+ }
+ }
+ g_hash_table_replace(device_list, device->id, device);
+
+ ndevices = g_hash_table_size(device_list);
+ crm_notice("Added '%s' to device list (%d active device%s)",
+ device->id, ndevices, pcmk__plural_s(ndevices));
+ }
+
+ if (from_cib) {
+ device->cib_registered = TRUE;
+ } else {
+ device->api_registered = TRUE;
+ }
+
+ return pcmk_ok;
+}
+
+void
+stonith_device_remove(const char *id, bool from_cib)
+{
+ stonith_device_t *device = g_hash_table_lookup(device_list, id);
+ guint ndevices = 0;
+
+ if (!device) {
+ ndevices = g_hash_table_size(device_list);
+ crm_info("Device '%s' not found (%d active device%s)",
+ id, ndevices, pcmk__plural_s(ndevices));
+ return;
+ }
+
+ if (from_cib) {
+ device->cib_registered = FALSE;
+ } else {
+ device->verified = FALSE;
+ device->api_registered = FALSE;
+ }
+
+ if (!device->cib_registered && !device->api_registered) {
+ g_hash_table_remove(device_list, id);
+ ndevices = g_hash_table_size(device_list);
+ crm_info("Removed '%s' from device list (%d active device%s)",
+ id, ndevices, pcmk__plural_s(ndevices));
+ } else {
+ crm_trace("Not removing '%s' from device list (%d active) because "
+ "still registered via:%s%s",
+ id, g_hash_table_size(device_list),
+ (device->cib_registered? " cib" : ""),
+ (device->api_registered? " api" : ""));
+ }
+}
+
+/*!
+ * \internal
+ * \brief Return the number of stonith levels registered for a node
+ *
+ * \param[in] tp Node's topology table entry
+ *
+ * \return Number of non-NULL levels in topology entry
+ * \note This function is used only for log messages.
+ */
+static int
+count_active_levels(const stonith_topology_t *tp)
+{
+ int lpc = 0;
+ int count = 0;
+
+ for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
+ if (tp->levels[lpc] != NULL) {
+ count++;
+ }
+ }
+ return count;
+}
+
+static void
+free_topology_entry(gpointer data)
+{
+ stonith_topology_t *tp = data;
+
+ int lpc = 0;
+
+ for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
+ if (tp->levels[lpc] != NULL) {
+ g_list_free_full(tp->levels[lpc], free);
+ }
+ }
+ free(tp->target);
+ free(tp->target_value);
+ free(tp->target_pattern);
+ free(tp->target_attribute);
+ free(tp);
+}
+
+void
+free_topology_list(void)
+{
+ if (topology != NULL) {
+ g_hash_table_destroy(topology);
+ topology = NULL;
+ }
+}
+
+void
+init_topology_list(void)
+{
+ if (topology == NULL) {
+ topology = pcmk__strkey_table(NULL, free_topology_entry);
+ }
+}
+
+char *
+stonith_level_key(const xmlNode *level, enum fenced_target_by mode)
+{
+ if (mode == fenced_target_by_unknown) {
+ mode = unpack_level_kind(level);
+ }
+ switch (mode) {
+ case fenced_target_by_name:
+ return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET);
+
+ case fenced_target_by_pattern:
+ return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN);
+
+ case fenced_target_by_attribute:
+ return crm_strdup_printf("%s=%s",
+ crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE),
+ crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE));
+
+ default:
+ return crm_strdup_printf("unknown-%s", ID(level));
+ }
+}
+
+/*!
+ * \internal
+ * \brief Parse target identification from topology level XML
+ *
+ * \param[in] level Topology level XML to parse
+ *
+ * \return How to identify target of \p level
+ */
+static enum fenced_target_by
+unpack_level_kind(const xmlNode *level)
+{
+ if (crm_element_value(level, XML_ATTR_STONITH_TARGET) != NULL) {
+ return fenced_target_by_name;
+ }
+ if (crm_element_value(level, XML_ATTR_STONITH_TARGET_PATTERN) != NULL) {
+ return fenced_target_by_pattern;
+ }
+ if (!stand_alone /* if standalone, there's no attribute manager */
+ && (crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE) != NULL)
+ && (crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE) != NULL)) {
+ return fenced_target_by_attribute;
+ }
+ return fenced_target_by_unknown;
+}
+
+static stonith_key_value_t *
+parse_device_list(const char *devices)
+{
+ int lpc = 0;
+ int max = 0;
+ int last = 0;
+ stonith_key_value_t *output = NULL;
+
+ if (devices == NULL) {
+ return output;
+ }
+
+ max = strlen(devices);
+ for (lpc = 0; lpc <= max; lpc++) {
+ if (devices[lpc] == ',' || devices[lpc] == 0) {
+ char *line = strndup(devices + last, lpc - last);
+
+ output = stonith_key_value_add(output, NULL, line);
+ free(line);
+
+ last = lpc + 1;
+ }
+ }
+
+ return output;
+}
+
+/*!
+ * \internal
+ * \brief Unpack essential information from topology request XML
+ *
+ * \param[in] xml Request XML to search
+ * \param[out] mode If not NULL, where to store level kind
+ * \param[out] target If not NULL, where to store representation of target
+ * \param[out] id If not NULL, where to store level number
+ * \param[out] desc If not NULL, where to store log-friendly level description
+ *
+ * \return Topology level XML from within \p xml, or NULL if not found
+ * \note The caller is responsible for freeing \p *target and \p *desc if set.
+ */
+static xmlNode *
+unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target,
+ int *id, char **desc)
+{
+ enum fenced_target_by local_mode = fenced_target_by_unknown;
+ char *local_target = NULL;
+ int local_id = 0;
+
+ /* The level element can be the top element or lower. If top level, don't
+ * search by xpath, because it might give multiple hits if the XML is the
+ * CIB.
+ */
+ if ((xml != NULL)
+ && !pcmk__str_eq(TYPE(xml), XML_TAG_FENCING_LEVEL, pcmk__str_none)) {
+ xml = get_xpath_object("//" XML_TAG_FENCING_LEVEL, xml, LOG_WARNING);
+ }
+
+ if (xml == NULL) {
+ if (desc != NULL) {
+ *desc = crm_strdup_printf("missing");
+ }
+ } else {
+ local_mode = unpack_level_kind(xml);
+ local_target = stonith_level_key(xml, local_mode);
+ crm_element_value_int(xml, XML_ATTR_STONITH_INDEX, &local_id);
+ if (desc != NULL) {
+ *desc = crm_strdup_printf("%s[%d]", local_target, local_id);
+ }
+ }
+
+ if (mode != NULL) {
+ *mode = local_mode;
+ }
+ if (id != NULL) {
+ *id = local_id;
+ }
+
+ if (target != NULL) {
+ *target = local_target;
+ } else {
+ free(local_target);
+ }
+
+ return xml;
+}
+
+/*!
+ * \internal
+ * \brief Register a fencing topology level for a target
+ *
+ * Given an XML request specifying the target name, level index, and device IDs
+ * for the level, this will create an entry for the target in the global topology
+ * table if one does not already exist, then append the specified device IDs to
+ * the entry's device list for the specified level.
+ *
+ * \param[in] msg XML request for STONITH level registration
+ * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]"
+ * \param[out] result Where to set result of registration
+ */
+void
+fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result)
+{
+ int id = 0;
+ xmlNode *level;
+ enum fenced_target_by mode;
+ char *target;
+
+ stonith_topology_t *tp;
+ stonith_key_value_t *dIter = NULL;
+ stonith_key_value_t *devices = NULL;
+
+ CRM_CHECK((msg != NULL) && (result != NULL), return);
+
+ level = unpack_level_request(msg, &mode, &target, &id, desc);
+ if (level == NULL) {
+ fenced_set_protocol_error(result);
+ return;
+ }
+
+ // Ensure an ID was given (even the client API adds an ID)
+ if (pcmk__str_empty(ID(level))) {
+ crm_warn("Ignoring registration for topology level without ID");
+ free(target);
+ crm_log_xml_trace(level, "Bad level");
+ pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
+ "Topology level is invalid without ID");
+ return;
+ }
+
+ // Ensure a valid target was specified
+ if (mode == fenced_target_by_unknown) {
+ crm_warn("Ignoring registration for topology level '%s' "
+ "without valid target", ID(level));
+ free(target);
+ crm_log_xml_trace(level, "Bad level");
+ pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
+ "Invalid target for topology level '%s'",
+ ID(level));
+ return;
+ }
+
+ // Ensure level ID is in allowed range
+ if ((id <= 0) || (id >= ST_LEVEL_MAX)) {
+ crm_warn("Ignoring topology registration for %s with invalid level %d",
+ target, id);
+ free(target);
+ crm_log_xml_trace(level, "Bad level");
+ pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
+ "Invalid level number '%s' for topology level '%s'",
+ pcmk__s(crm_element_value(level,
+ XML_ATTR_STONITH_INDEX),
+ ""),
+ ID(level));
+ return;
+ }
+
+ /* Find or create topology table entry */
+ tp = g_hash_table_lookup(topology, target);
+ if (tp == NULL) {
+ tp = calloc(1, sizeof(stonith_topology_t));
+ if (tp == NULL) {
+ pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
+ strerror(ENOMEM));
+ free(target);
+ return;
+ }
+ tp->kind = mode;
+ tp->target = target;
+ tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE);
+ tp->target_pattern = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN);
+ tp->target_attribute = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE);
+
+ g_hash_table_replace(topology, tp->target, tp);
+ crm_trace("Added %s (%d) to the topology (%d active entries)",
+ target, (int) mode, g_hash_table_size(topology));
+ } else {
+ free(target);
+ }
+
+ if (tp->levels[id] != NULL) {
+ crm_info("Adding to the existing %s[%d] topology entry",
+ tp->target, id);
+ }
+
+ devices = parse_device_list(crm_element_value(level, XML_ATTR_STONITH_DEVICES));
+ for (dIter = devices; dIter; dIter = dIter->next) {
+ const char *device = dIter->value;
+
+ crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id);
+ tp->levels[id] = g_list_append(tp->levels[id], strdup(device));
+ }
+ stonith_key_value_freeall(devices, 1, 1);
+
+ {
+ int nlevels = count_active_levels(tp);
+
+ crm_info("Target %s has %d active fencing level%s",
+ tp->target, nlevels, pcmk__plural_s(nlevels));
+ }
+
+ pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+}
+
+/*!
+ * \internal
+ * \brief Unregister a fencing topology level for a target
+ *
+ * Given an XML request specifying the target name and level index (or 0 for all
+ * levels), this will remove any corresponding entry for the target from the
+ * global topology table.
+ *
+ * \param[in] msg XML request for STONITH level registration
+ * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]"
+ * \param[out] result Where to set result of unregistration
+ */
+void
+fenced_unregister_level(xmlNode *msg, char **desc,
+ pcmk__action_result_t *result)
+{
+ int id = -1;
+ stonith_topology_t *tp;
+ char *target;
+ xmlNode *level = NULL;
+
+ CRM_CHECK(result != NULL, return);
+
+ level = unpack_level_request(msg, NULL, &target, &id, desc);
+ if (level == NULL) {
+ fenced_set_protocol_error(result);
+ return;
+ }
+
+ // Ensure level ID is in allowed range
+ if ((id < 0) || (id >= ST_LEVEL_MAX)) {
+ crm_warn("Ignoring topology unregistration for %s with invalid level %d",
+ target, id);
+ free(target);
+ crm_log_xml_trace(level, "Bad level");
+ pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
+ "Invalid level number '%s' for topology level %s",
+ pcmk__s(crm_element_value(level,
+ XML_ATTR_STONITH_INDEX),
+ "<null>"),
+
+ // Client API doesn't add ID to unregistration XML
+ pcmk__s(ID(level), ""));
+ return;
+ }
+
+ tp = g_hash_table_lookup(topology, target);
+ if (tp == NULL) {
+ guint nentries = g_hash_table_size(topology);
+
+ crm_info("No fencing topology found for %s (%d active %s)",
+ target, nentries,
+ pcmk__plural_alt(nentries, "entry", "entries"));
+
+ } else if (id == 0 && g_hash_table_remove(topology, target)) {
+ guint nentries = g_hash_table_size(topology);
+
+ crm_info("Removed all fencing topology entries related to %s "
+ "(%d active %s remaining)", target, nentries,
+ pcmk__plural_alt(nentries, "entry", "entries"));
+
+ } else if (tp->levels[id] != NULL) {
+ guint nlevels;
+
+ g_list_free_full(tp->levels[id], free);
+ tp->levels[id] = NULL;
+
+ nlevels = count_active_levels(tp);
+ crm_info("Removed level %d from fencing topology for %s "
+ "(%d active level%s remaining)",
+ id, target, nlevels, pcmk__plural_s(nlevels));
+ }
+
+ free(target);
+ pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+}
+
+static char *
+list_to_string(GList *list, const char *delim, gboolean terminate_with_delim)
+{
+ int max = g_list_length(list);
+ size_t delim_len = delim?strlen(delim):0;
+ size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0);
+ char *rv;
+ GList *gIter;
+
+ for (gIter = list; gIter != NULL; gIter = gIter->next) {
+ const char *value = (const char *) gIter->data;
+
+ alloc_size += strlen(value);
+ }
+ rv = calloc(alloc_size, sizeof(char));
+ if (rv) {
+ char *pos = rv;
+ const char *lead_delim = "";
+
+ for (gIter = list; gIter != NULL; gIter = gIter->next) {
+ const char *value = (const char *) gIter->data;
+
+ pos = &pos[sprintf(pos, "%s%s", lead_delim, value)];
+ lead_delim = delim;
+ }
+ if (max && terminate_with_delim) {
+ sprintf(pos, "%s", delim);
+ }
+ }
+ return rv;
+}
+
+/*!
+ * \internal
+ * \brief Execute a fence agent action directly (and asynchronously)
+ *
+ * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action
+ * directly on a specified device. Only list, monitor, and status actions are
+ * expected to use this call, though it should work with any agent command.
+ *
+ * \param[in] msg Request XML specifying action
+ * \param[out] result Where to store result of action
+ *
+ * \note If the action is monitor, the device must be registered via the API
+ * (CIB registration is not sufficient), because monitor should not be
+ * possible unless the device is "started" (API registered).
+ */
+static void
+execute_agent_action(xmlNode *msg, pcmk__action_result_t *result)
+{
+ xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR);
+ xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR);
+ const char *id = crm_element_value(dev, F_STONITH_DEVICE);
+ const char *action = crm_element_value(op, F_STONITH_ACTION);
+ async_command_t *cmd = NULL;
+ stonith_device_t *device = NULL;
+
+ if ((id == NULL) || (action == NULL)) {
+ crm_info("Malformed API action request: device %s, action %s",
+ (id? id : "not specified"),
+ (action? action : "not specified"));
+ fenced_set_protocol_error(result);
+ return;
+ }
+
+ if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
+ // Watchdog agent actions are implemented internally
+ if (stonith_watchdog_timeout_ms <= 0) {
+ pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
+ "Watchdog fence device not configured");
+ return;
+
+ } else if (pcmk__str_eq(action, "list", pcmk__str_none)) {
+ pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ pcmk__set_result_output(result,
+ list_to_string(stonith_watchdog_targets,
+ "\n", TRUE),
+ NULL);
+ return;
+
+ } else if (pcmk__str_eq(action, "monitor", pcmk__str_none)) {
+ pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ return;
+ }
+ }
+
+ device = g_hash_table_lookup(device_list, id);
+ if (device == NULL) {
+ crm_info("Ignoring API '%s' action request because device %s not found",
+ action, id);
+ pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
+ "'%s' not found", id);
+ return;
+
+ } else if (!device->api_registered && !strcmp(action, "monitor")) {
+ // Monitors may run only on "started" (API-registered) devices
+ crm_info("Ignoring API '%s' action request because device %s not active",
+ action, id);
+ pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
+ "'%s' not active", id);
+ return;
+ }
+
+ cmd = create_async_command(msg);
+ if (cmd == NULL) {
+ crm_log_xml_warn(msg, "invalid");
+ fenced_set_protocol_error(result);
+ return;
+ }
+
+ schedule_stonith_command(cmd, device);
+ pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
+}
+
+static void
+search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence)
+{
+ search->replies_received++;
+ if (can_fence && device) {
+ if (search->support_action_only != st_device_supports_none) {
+ stonith_device_t *dev = g_hash_table_lookup(device_list, device);
+ if (dev && !pcmk_is_set(dev->flags, search->support_action_only)) {
+ return;
+ }
+ }
+ search->capable = g_list_append(search->capable, strdup(device));
+ }
+
+ if (search->replies_needed == search->replies_received) {
+
+ guint ndevices = g_list_length(search->capable);
+
+ crm_debug("Search found %d device%s that can perform '%s' targeting %s",
+ ndevices, pcmk__plural_s(ndevices),
+ (search->action? search->action : "unknown action"),
+ (search->host? search->host : "any node"));
+
+ search->callback(search->capable, search->user_data);
+ free(search->host);
+ free(search->action);
+ free(search);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Check whether the local host is allowed to execute a fencing action
+ *
+ * \param[in] device Fence device to check
+ * \param[in] action Fence action to check
+ * \param[in] target Hostname of fence target
+ * \param[in] allow_suicide Whether self-fencing is allowed for this operation
+ *
+ * \return TRUE if local host is allowed to execute action, FALSE otherwise
+ */
+static gboolean
+localhost_is_eligible(const stonith_device_t *device, const char *action,
+ const char *target, gboolean allow_suicide)
+{
+ gboolean localhost_is_target = pcmk__str_eq(target, stonith_our_uname,
+ pcmk__str_casei);
+
+ if ((device != NULL) && (action != NULL)
+ && (device->on_target_actions != NULL)
+ && (strstr((const char*) device->on_target_actions->str,
+ action) != NULL)) {
+
+ if (!localhost_is_target) {
+ crm_trace("Operation '%s' using %s can only be executed for local "
+ "host, not %s", action, device->id, target);
+ return FALSE;
+ }
+
+ } else if (localhost_is_target && !allow_suicide) {
+ crm_trace("'%s' operation does not support self-fencing", action);
+ return FALSE;
+ }
+ return TRUE;
+}
+
+/*!
+ * \internal
+ * \brief Check if local node is allowed to execute (possibly remapped) action
+ *
+ * \param[in] device Fence device to check
+ * \param[in] action Fence action to check
+ * \param[in] target Node name of fence target
+ * \param[in] allow_self Whether self-fencing is allowed for this operation
+ *
+ * \return true if local node is allowed to execute \p action or any actions it
+ * might be remapped to, otherwise false
+ */
+static bool
+localhost_is_eligible_with_remap(const stonith_device_t *device,
+ const char *action, const char *target,
+ gboolean allow_self)
+{
+ // Check exact action
+ if (localhost_is_eligible(device, action, target, allow_self)) {
+ return true;
+ }
+
+ // Check potential remaps
+
+ if (pcmk__str_eq(action, "reboot", pcmk__str_none)) {
+ /* "reboot" might get remapped to "off" then "on", so even if reboot is
+ * disallowed, return true if either of those is allowed. We'll report
+ * the disallowed actions with the results. We never allow self-fencing
+ * for remapped "on" actions because the target is off at that point.
+ */
+ if (localhost_is_eligible(device, "off", target, allow_self)
+ || localhost_is_eligible(device, "on", target, FALSE)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void
+can_fence_host_with_device(stonith_device_t *dev,
+ struct device_search_s *search)
+{
+ gboolean can = FALSE;
+ const char *check_type = "Internal bug";
+ const char *target = NULL;
+ const char *alias = NULL;
+ const char *dev_id = "Unspecified device";
+ const char *action = (search == NULL)? NULL : search->action;
+
+ CRM_CHECK((dev != NULL) && (action != NULL), goto search_report_results);
+
+ if (dev->id != NULL) {
+ dev_id = dev->id;
+ }
+
+ target = search->host;
+ if (target == NULL) {
+ can = TRUE;
+ check_type = "No target";
+ goto search_report_results;
+ }
+
+ /* Answer immediately if the device does not support the action
+ * or the local node is not allowed to perform it
+ */
+ if (pcmk__str_eq(action, "on", pcmk__str_none)
+ && !pcmk_is_set(dev->flags, st_device_supports_on)) {
+ check_type = "Agent does not support 'on'";
+ goto search_report_results;
+
+ } else if (!localhost_is_eligible_with_remap(dev, action, target,
+ search->allow_suicide)) {
+ check_type = "This node is not allowed to execute action";
+ goto search_report_results;
+ }
+
+ // Check eligibility as specified by pcmk_host_check
+ check_type = target_list_type(dev);
+ alias = g_hash_table_lookup(dev->aliases, target);
+ if (pcmk__str_eq(check_type, PCMK__VALUE_NONE, pcmk__str_casei)) {
+ can = TRUE;
+
+ } else if (pcmk__str_eq(check_type, "static-list", pcmk__str_casei)) {
+ if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) {
+ can = TRUE;
+ } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)
+ && g_hash_table_lookup(dev->aliases, target)) {
+ can = TRUE;
+ }
+
+ } else if (pcmk__str_eq(check_type, "dynamic-list", pcmk__str_casei)) {
+ time_t now = time(NULL);
+
+ if (dev->targets == NULL || dev->targets_age + 60 < now) {
+ int device_timeout = get_action_timeout(dev, "list", search->per_device_timeout);
+
+ if (device_timeout > search->per_device_timeout) {
+ crm_notice("Since the pcmk_list_timeout(%ds) parameter of %s is larger than stonith-timeout(%ds), timeout may occur",
+ device_timeout, dev_id, search->per_device_timeout);
+ }
+
+ crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
+ check_type, dev_id, target, action);
+
+ schedule_internal_command(__func__, dev, "list", NULL,
+ search->per_device_timeout, search, dynamic_list_search_cb);
+
+ /* we'll respond to this search request async in the cb */
+ return;
+ }
+
+ if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets,
+ pcmk__str_casei)) {
+ can = TRUE;
+ }
+
+ } else if (pcmk__str_eq(check_type, "status", pcmk__str_casei)) {
+ int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout);
+
+ if (device_timeout > search->per_device_timeout) {
+ crm_notice("Since the pcmk_status_timeout(%ds) parameter of %s is larger than stonith-timeout(%ds), timeout may occur",
+ device_timeout, dev_id, search->per_device_timeout);
+ }
+
+ crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
+ check_type, dev_id, target, action);
+ schedule_internal_command(__func__, dev, "status", target,
+ search->per_device_timeout, search, status_search_cb);
+ /* we'll respond to this search request async in the cb */
+ return;
+ } else {
+ crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type);
+ check_type = "Invalid " PCMK_STONITH_HOST_CHECK;
+ }
+
+ search_report_results:
+ crm_info("%s is%s eligible to fence (%s) %s%s%s%s: %s",
+ dev_id, (can? "" : " not"), pcmk__s(action, "unspecified action"),
+ pcmk__s(target, "unspecified target"),
+ (alias == NULL)? "" : " (as '", pcmk__s(alias, ""),
+ (alias == NULL)? "" : "')", check_type);
+ search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can);
+}
+
+static void
+search_devices(gpointer key, gpointer value, gpointer user_data)
+{
+ stonith_device_t *dev = value;
+ struct device_search_s *search = user_data;
+
+ can_fence_host_with_device(dev, search);
+}
+
+#define DEFAULT_QUERY_TIMEOUT 20
+static void
+get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data,
+ void (*callback) (GList * devices, void *user_data), uint32_t support_action_only)
+{
+ struct device_search_s *search;
+ guint ndevices = g_hash_table_size(device_list);
+
+ if (ndevices == 0) {
+ callback(NULL, user_data);
+ return;
+ }
+
+ search = calloc(1, sizeof(struct device_search_s));
+ if (!search) {
+ crm_crit("Cannot search for capable fence devices: %s",
+ strerror(ENOMEM));
+ callback(NULL, user_data);
+ return;
+ }
+
+ pcmk__str_update(&search->host, host);
+ pcmk__str_update(&search->action, action);
+ search->per_device_timeout = timeout;
+ search->allow_suicide = suicide;
+ search->callback = callback;
+ search->user_data = user_data;
+ search->support_action_only = support_action_only;
+
+ /* We are guaranteed this many replies, even if a device is
+ * unregistered while the search is in progress.
+ */
+ search->replies_needed = ndevices;
+
+ crm_debug("Searching %d device%s to see which can execute '%s' targeting %s",
+ ndevices, pcmk__plural_s(ndevices),
+ (search->action? search->action : "unknown action"),
+ (search->host? search->host : "any node"));
+ g_hash_table_foreach(device_list, search_devices, search);
+}
+
+struct st_query_data {
+ xmlNode *reply;
+ char *remote_peer;
+ char *client_id;
+ char *target;
+ char *action;
+ int call_options;
+};
+
+/*!
+ * \internal
+ * \brief Add action-specific attributes to query reply XML
+ *
+ * \param[in,out] xml XML to add attributes to
+ * \param[in] action Fence action
+ * \param[in] device Fence device
+ * \param[in] target Fence target
+ */
+static void
+add_action_specific_attributes(xmlNode *xml, const char *action,
+ const stonith_device_t *device,
+ const char *target)
+{
+ int action_specific_timeout;
+ int delay_max;
+ int delay_base;
+
+ CRM_CHECK(xml && action && device, return);
+
+ if (is_action_required(action, device)) {
+ crm_trace("Action '%s' is required using %s", action, device->id);
+ crm_xml_add_int(xml, F_STONITH_DEVICE_REQUIRED, 1);
+ }
+
+ action_specific_timeout = get_action_timeout(device, action, 0);
+ if (action_specific_timeout) {
+ crm_trace("Action '%s' has timeout %dms using %s",
+ action, action_specific_timeout, device->id);
+ crm_xml_add_int(xml, F_STONITH_ACTION_TIMEOUT, action_specific_timeout);
+ }
+
+ delay_max = get_action_delay_max(device, action);
+ if (delay_max > 0) {
+ crm_trace("Action '%s' has maximum random delay %ds using %s",
+ action, delay_max, device->id);
+ crm_xml_add_int(xml, F_STONITH_DELAY_MAX, delay_max);
+ }
+
+ delay_base = get_action_delay_base(device, action, target);
+ if (delay_base > 0) {
+ crm_xml_add_int(xml, F_STONITH_DELAY_BASE, delay_base);
+ }
+
+ if ((delay_max > 0) && (delay_base == 0)) {
+ crm_trace("Action '%s' has maximum random delay %ds using %s",
+ action, delay_max, device->id);
+ } else if ((delay_max == 0) && (delay_base > 0)) {
+ crm_trace("Action '%s' has a static delay of %ds using %s",
+ action, delay_base, device->id);
+ } else if ((delay_max > 0) && (delay_base > 0)) {
+ crm_trace("Action '%s' has a minimum delay of %ds and a randomly chosen "
+ "maximum delay of %ds using %s",
+ action, delay_base, delay_max, device->id);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add "disallowed" attribute to query reply XML if appropriate
+ *
+ * \param[in,out] xml XML to add attribute to
+ * \param[in] action Fence action
+ * \param[in] device Fence device
+ * \param[in] target Fence target
+ * \param[in] allow_suicide Whether self-fencing is allowed
+ */
+static void
+add_disallowed(xmlNode *xml, const char *action, const stonith_device_t *device,
+ const char *target, gboolean allow_suicide)
+{
+ if (!localhost_is_eligible(device, action, target, allow_suicide)) {
+ crm_trace("Action '%s' using %s is disallowed for local host",
+ action, device->id);
+ pcmk__xe_set_bool_attr(xml, F_STONITH_ACTION_DISALLOWED, true);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add child element with action-specific values to query reply XML
+ *
+ * \param[in,out] xml XML to add attribute to
+ * \param[in] action Fence action
+ * \param[in] device Fence device
+ * \param[in] target Fence target
+ * \param[in] allow_suicide Whether self-fencing is allowed
+ */
+static void
+add_action_reply(xmlNode *xml, const char *action,
+ const stonith_device_t *device, const char *target,
+ gboolean allow_suicide)
+{
+ xmlNode *child = create_xml_node(xml, F_STONITH_ACTION);
+
+ crm_xml_add(child, XML_ATTR_ID, action);
+ add_action_specific_attributes(child, action, device, target);
+ add_disallowed(child, action, device, target, allow_suicide);
+}
+
+static void
+stonith_query_capable_device_cb(GList * devices, void *user_data)
+{
+ struct st_query_data *query = user_data;
+ int available_devices = 0;
+ xmlNode *dev = NULL;
+ xmlNode *list = NULL;
+ GList *lpc = NULL;
+ pcmk__client_t *client = NULL;
+
+ if (query->client_id != NULL) {
+ client = pcmk__find_client_by_id(query->client_id);
+ if ((client == NULL) && (query->remote_peer == NULL)) {
+ crm_trace("Skipping reply to %s: no longer a client",
+ query->client_id);
+ goto done;
+ }
+ }
+
+ /* Pack the results into XML */
+ list = create_xml_node(NULL, __func__);
+ crm_xml_add(list, F_STONITH_TARGET, query->target);
+ for (lpc = devices; lpc != NULL; lpc = lpc->next) {
+ stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data);
+ const char *action = query->action;
+
+ if (!device) {
+ /* It is possible the device got unregistered while
+ * determining who can fence the target */
+ continue;
+ }
+
+ available_devices++;
+
+ dev = create_xml_node(list, F_STONITH_DEVICE);
+ crm_xml_add(dev, XML_ATTR_ID, device->id);
+ crm_xml_add(dev, "namespace", device->namespace);
+ crm_xml_add(dev, "agent", device->agent);
+ crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified);
+ crm_xml_add_int(dev, F_STONITH_DEVICE_SUPPORT_FLAGS, device->flags);
+
+ /* If the originating fencer wants to reboot the node, and we have a
+ * capable device that doesn't support "reboot", remap to "off" instead.
+ */
+ if (!pcmk_is_set(device->flags, st_device_supports_reboot)
+ && pcmk__str_eq(query->action, "reboot", pcmk__str_none)) {
+ crm_trace("%s doesn't support reboot, using values for off instead",
+ device->id);
+ action = "off";
+ }
+
+ /* Add action-specific values if available */
+ add_action_specific_attributes(dev, action, device, query->target);
+ if (pcmk__str_eq(query->action, "reboot", pcmk__str_none)) {
+ /* A "reboot" *might* get remapped to "off" then "on", so after
+ * sending the "reboot"-specific values in the main element, we add
+ * sub-elements for "off" and "on" values.
+ *
+ * We short-circuited earlier if "reboot", "off" and "on" are all
+ * disallowed for the local host. However if only one or two are
+ * disallowed, we send back the results and mark which ones are
+ * disallowed. If "reboot" is disallowed, this might cause problems
+ * with older fencer versions, which won't check for it. Older
+ * versions will ignore "off" and "on", so they are not a problem.
+ */
+ add_disallowed(dev, action, device, query->target,
+ pcmk_is_set(query->call_options, st_opt_allow_suicide));
+ add_action_reply(dev, "off", device, query->target,
+ pcmk_is_set(query->call_options, st_opt_allow_suicide));
+ add_action_reply(dev, "on", device, query->target, FALSE);
+ }
+
+ /* A query without a target wants device parameters */
+ if (query->target == NULL) {
+ xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS);
+
+ g_hash_table_foreach(device->params, hash2field, attrs);
+ }
+ }
+
+ crm_xml_add_int(list, F_STONITH_AVAILABLE_DEVICES, available_devices);
+ if (query->target) {
+ crm_debug("Found %d matching device%s for target '%s'",
+ available_devices, pcmk__plural_s(available_devices),
+ query->target);
+ } else {
+ crm_debug("%d device%s installed",
+ available_devices, pcmk__plural_s(available_devices));
+ }
+
+ if (list != NULL) {
+ crm_log_xml_trace(list, "Add query results");
+ add_message_xml(query->reply, F_STONITH_CALLDATA, list);
+ }
+
+ stonith_send_reply(query->reply, query->call_options, query->remote_peer,
+ client);
+
+done:
+ free_xml(query->reply);
+ free(query->remote_peer);
+ free(query->client_id);
+ free(query->target);
+ free(query->action);
+ free(query);
+ free_xml(list);
+ g_list_free_full(devices, free);
+}
+
+/*!
+ * \internal
+ * \brief Log the result of an asynchronous command
+ *
+ * \param[in] cmd Command the result is for
+ * \param[in] result Result of command
+ * \param[in] pid Process ID of command, if available
+ * \param[in] next Alternate device that will be tried if command failed
+ * \param[in] op_merged Whether this command was merged with an earlier one
+ */
+static void
+log_async_result(const async_command_t *cmd,
+ const pcmk__action_result_t *result,
+ int pid, const char *next, bool op_merged)
+{
+ int log_level = LOG_ERR;
+ int output_log_level = LOG_NEVER;
+ guint devices_remaining = g_list_length(cmd->next_device_iter);
+
+ GString *msg = g_string_sized_new(80); // Reasonable starting size
+
+ // Choose log levels appropriately if we have a result
+ if (pcmk__result_ok(result)) {
+ log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE;
+ if ((result->action_stdout != NULL)
+ && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_none)) {
+ output_log_level = LOG_DEBUG;
+ }
+ next = NULL;
+ } else {
+ log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR;
+ if ((result->action_stdout != NULL)
+ && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_none)) {
+ output_log_level = LOG_WARNING;
+ }
+ }
+
+ // Build the log message piece by piece
+ pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL);
+ if (pid != 0) {
+ g_string_append_printf(msg, "[%d] ", pid);
+ }
+ if (cmd->target != NULL) {
+ pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL);
+ }
+ if (cmd->device != NULL) {
+ pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL);
+ }
+
+ // Add exit status or execution status as appropriate
+ if (result->execution_status == PCMK_EXEC_DONE) {
+ g_string_append_printf(msg, "returned %d", result->exit_status);
+ } else {
+ pcmk__g_strcat(msg, "could not be executed: ",
+ pcmk_exec_status_str(result->execution_status), NULL);
+ }
+
+ // Add exit reason and next device if appropriate
+ if (result->exit_reason != NULL) {
+ pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL);
+ }
+ if (next != NULL) {
+ pcmk__g_strcat(msg, ", retrying with ", next, NULL);
+ }
+ if (devices_remaining > 0) {
+ g_string_append_printf(msg, " (%u device%s remaining)",
+ (unsigned int) devices_remaining,
+ pcmk__plural_s(devices_remaining));
+ }
+ g_string_append_printf(msg, " " CRM_XS " %scall %d from %s",
+ (op_merged? "merged " : ""), cmd->id,
+ cmd->client_name);
+
+ // Log the result
+ do_crm_log(log_level, "%s", msg->str);
+ g_string_free(msg, TRUE);
+
+ // Log the output (which may have multiple lines), if appropriate
+ if (output_log_level != LOG_NEVER) {
+ char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid);
+
+ crm_log_output(output_log_level, prefix, result->action_stdout);
+ free(prefix);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Reply to requester after asynchronous command completion
+ *
+ * \param[in] cmd Command that completed
+ * \param[in] result Result of command
+ * \param[in] pid Process ID of command, if available
+ * \param[in] merged If true, command was merged with another, not executed
+ */
+static void
+send_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result,
+ int pid, bool merged)
+{
+ xmlNode *reply = NULL;
+ pcmk__client_t *client = NULL;
+
+ CRM_CHECK((cmd != NULL) && (result != NULL), return);
+
+ log_async_result(cmd, result, pid, NULL, merged);
+
+ if (cmd->client != NULL) {
+ client = pcmk__find_client_by_id(cmd->client);
+ if ((client == NULL) && (cmd->origin == NULL)) {
+ crm_trace("Skipping reply to %s: no longer a client", cmd->client);
+ return;
+ }
+ }
+
+ reply = construct_async_reply(cmd, result);
+ if (merged) {
+ pcmk__xe_set_bool_attr(reply, F_STONITH_MERGED, true);
+ }
+
+ if (!stand_alone && pcmk__is_fencing_action(cmd->action)
+ && pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) {
+ /* The target was also the originator, so broadcast the result on its
+ * behalf (since it will be unable to).
+ */
+ crm_trace("Broadcast '%s' result for %s (target was also originator)",
+ cmd->action, cmd->target);
+ crm_xml_add(reply, F_SUBTYPE, "broadcast");
+ crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY);
+ send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);
+ } else {
+ // Reply only to the originator
+ stonith_send_reply(reply, cmd->options, cmd->origin, client);
+ }
+
+ crm_log_xml_trace(reply, "Reply");
+ free_xml(reply);
+
+ if (stand_alone) {
+ /* Do notification with a clean data object */
+ xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
+
+ stonith__xe_set_result(notify_data, result);
+ crm_xml_add(notify_data, F_STONITH_TARGET, cmd->target);
+ crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op);
+ crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost");
+ crm_xml_add(notify_data, F_STONITH_DEVICE, cmd->device);
+ crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
+ crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client);
+
+ fenced_send_notification(T_STONITH_NOTIFY_FENCE, result, notify_data);
+ fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
+ }
+}
+
+static void
+cancel_stonith_command(async_command_t * cmd)
+{
+ stonith_device_t *device = cmd_device(cmd);
+
+ if (device) {
+ crm_trace("Cancel scheduled '%s' action using %s",
+ cmd->action, device->id);
+ device->pending_ops = g_list_remove(device->pending_ops, cmd);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Cancel and reply to any duplicates of a just-completed operation
+ *
+ * Check whether any fencing operations are scheduled to do the same thing as
+ * one that just succeeded. If so, rather than performing the same operation
+ * twice, return the result of this operation for all matching pending commands.
+ *
+ * \param[in,out] cmd Fencing operation that just succeeded
+ * \param[in] result Result of \p cmd
+ * \param[in] pid If nonzero, process ID of agent invocation (for logs)
+ *
+ * \note Duplicate merging will do the right thing for either type of remapped
+ * reboot. If the executing fencer remapped an unsupported reboot to off,
+ * then cmd->action will be "reboot" and will be merged with any other
+ * reboot requests. If the originating fencer remapped a topology reboot
+ * to off then on, we will get here once with cmd->action "off" and once
+ * with "on", and they will be merged separately with similar requests.
+ */
+static void
+reply_to_duplicates(async_command_t *cmd, const pcmk__action_result_t *result,
+ int pid)
+{
+ GList *next = NULL;
+
+ for (GList *iter = cmd_list; iter != NULL; iter = next) {
+ async_command_t *cmd_other = iter->data;
+
+ next = iter->next; // We might delete this entry, so grab next now
+
+ if (cmd == cmd_other) {
+ continue;
+ }
+
+ /* A pending operation matches if:
+ * 1. The client connections are different.
+ * 2. The target is the same.
+ * 3. The fencing action is the same.
+ * 4. The device scheduled to execute the action is the same.
+ */
+ if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) ||
+ !pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) ||
+ !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) ||
+ !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) {
+
+ continue;
+ }
+
+ crm_notice("Merging fencing action '%s'%s%s originating from "
+ "client %s with identical fencing request from client %s",
+ cmd_other->action,
+ (cmd_other->target == NULL)? "" : " targeting ",
+ pcmk__s(cmd_other->target, ""), cmd_other->client_name,
+ cmd->client_name);
+
+ // Stop tracking the duplicate, send its result, and cancel it
+ cmd_list = g_list_remove_link(cmd_list, iter);
+ send_async_reply(cmd_other, result, pid, true);
+ cancel_stonith_command(cmd_other);
+
+ free_async_command(cmd_other);
+ g_list_free_1(iter);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Return the next required device (if any) for an operation
+ *
+ * \param[in,out] cmd Fencing operation that just succeeded
+ *
+ * \return Next device required for action if any, otherwise NULL
+ */
+static stonith_device_t *
+next_required_device(async_command_t *cmd)
+{
+ for (GList *iter = cmd->next_device_iter; iter != NULL; iter = iter->next) {
+ stonith_device_t *next_device = g_hash_table_lookup(device_list,
+ iter->data);
+
+ if (is_action_required(cmd->action, next_device)) {
+ /* This is only called for successful actions, so it's OK to skip
+ * non-required devices.
+ */
+ cmd->next_device_iter = iter->next;
+ return next_device;
+ }
+ }
+ return NULL;
+}
+
+static void
+st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
+{
+ async_command_t *cmd = user_data;
+
+ stonith_device_t *device = NULL;
+ stonith_device_t *next_device = NULL;
+
+ CRM_CHECK(cmd != NULL, return);
+
+ device = cmd_device(cmd);
+ cmd->active_on = NULL;
+
+ /* The device is ready to do something else now */
+ if (device) {
+ if (!device->verified && pcmk__result_ok(result) &&
+ (pcmk__strcase_any_of(cmd->action, "list", "monitor", "status", NULL))) {
+
+ device->verified = TRUE;
+ }
+
+ mainloop_set_trigger(device->work);
+ }
+
+ if (pcmk__result_ok(result)) {
+ next_device = next_required_device(cmd);
+
+ } else if ((cmd->next_device_iter != NULL)
+ && !is_action_required(cmd->action, device)) {
+ /* if this device didn't work out, see if there are any others we can try.
+ * if the failed device was 'required', we can't pick another device. */
+ next_device = g_hash_table_lookup(device_list,
+ cmd->next_device_iter->data);
+ cmd->next_device_iter = cmd->next_device_iter->next;
+ }
+
+ if (next_device == NULL) {
+ send_async_reply(cmd, result, pid, false);
+ if (pcmk__result_ok(result)) {
+ reply_to_duplicates(cmd, result, pid);
+ }
+ free_async_command(cmd);
+
+ } else { // This operation requires more fencing
+ log_async_result(cmd, result, pid, next_device->id, false);
+ schedule_stonith_command(cmd, next_device);
+ }
+}
+
+static gint
+sort_device_priority(gconstpointer a, gconstpointer b)
+{
+ const stonith_device_t *dev_a = a;
+ const stonith_device_t *dev_b = b;
+
+ if (dev_a->priority > dev_b->priority) {
+ return -1;
+ } else if (dev_a->priority < dev_b->priority) {
+ return 1;
+ }
+ return 0;
+}
+
+static void
+stonith_fence_get_devices_cb(GList * devices, void *user_data)
+{
+ async_command_t *cmd = user_data;
+ stonith_device_t *device = NULL;
+ guint ndevices = g_list_length(devices);
+
+ crm_info("Found %d matching device%s for target '%s'",
+ ndevices, pcmk__plural_s(ndevices), cmd->target);
+
+ if (devices != NULL) {
+ /* Order based on priority */
+ devices = g_list_sort(devices, sort_device_priority);
+ device = g_hash_table_lookup(device_list, devices->data);
+ }
+
+ if (device == NULL) { // No device found
+ pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+
+ pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
+ "No device configured for target '%s'",
+ cmd->target);
+ send_async_reply(cmd, &result, 0, false);
+ pcmk__reset_result(&result);
+ free_async_command(cmd);
+ g_list_free_full(devices, free);
+
+ } else { // Device found, schedule it for fencing
+ cmd->device_list = devices;
+ cmd->next_device_iter = devices->next;
+ schedule_stonith_command(cmd, device);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Execute a fence action via the local node
+ *
+ * \param[in] msg Fencing request
+ * \param[out] result Where to store result of fence action
+ */
+static void
+fence_locally(xmlNode *msg, pcmk__action_result_t *result)
+{
+ const char *device_id = NULL;
+ stonith_device_t *device = NULL;
+ async_command_t *cmd = NULL;
+ xmlNode *dev = NULL;
+
+ CRM_CHECK((msg != NULL) && (result != NULL), return);
+
+ dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
+
+ cmd = create_async_command(msg);
+ if (cmd == NULL) {
+ crm_log_xml_warn(msg, "invalid");
+ fenced_set_protocol_error(result);
+ return;
+ }
+
+ device_id = crm_element_value(dev, F_STONITH_DEVICE);
+ if (device_id != NULL) {
+ device = g_hash_table_lookup(device_list, device_id);
+ if (device == NULL) {
+ crm_err("Requested device '%s' is not available", device_id);
+ pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
+ "Requested device '%s' not found", device_id);
+ return;
+ }
+ schedule_stonith_command(cmd, device);
+
+ } else {
+ const char *host = crm_element_value(dev, F_STONITH_TARGET);
+
+ if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) {
+ int nodeid = 0;
+ crm_node_t *node = NULL;
+
+ pcmk__scan_min_int(host, &nodeid, 0);
+ node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY);
+ if (node != NULL) {
+ host = node->uname;
+ }
+ }
+
+ /* If we get to here, then self-fencing is implicitly allowed */
+ get_capable_devices(host, cmd->action, cmd->default_timeout,
+ TRUE, cmd, stonith_fence_get_devices_cb,
+ fenced_support_flag(cmd->action));
+ }
+
+ pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
+}
+
+/*!
+ * \internal
+ * \brief Build an XML reply for a fencing operation
+ *
+ * \param[in] request Request that reply is for
+ * \param[in] data If not NULL, add to reply as call data
+ * \param[in] result Full result of fencing operation
+ *
+ * \return Newly created XML reply
+ * \note The caller is responsible for freeing the result.
+ * \note This has some overlap with construct_async_reply(), but that copies
+ * values from an async_command_t, whereas this one copies them from the
+ * request.
+ */
+xmlNode *
+fenced_construct_reply(const xmlNode *request, xmlNode *data,
+ const pcmk__action_result_t *result)
+{
+ xmlNode *reply = NULL;
+
+ reply = create_xml_node(NULL, T_STONITH_REPLY);
+
+ crm_xml_add(reply, "st_origin", __func__);
+ crm_xml_add(reply, F_TYPE, T_STONITH_NG);
+ stonith__xe_set_result(reply, result);
+
+ if (request == NULL) {
+ /* Most likely, this is the result of a stonith operation that was
+ * initiated before we came up. Unfortunately that means we lack enough
+ * information to provide clients with a full result.
+ *
+ * @TODO Maybe synchronize this information at start-up?
+ */
+ crm_warn("Missing request information for client notifications for "
+ "operation with result '%s' (initiated before we came up?)",
+ pcmk_exec_status_str(result->execution_status));
+
+ } else {
+ const char *name = NULL;
+ const char *value = NULL;
+
+ // Attributes to copy from request to reply
+ const char *names[] = {
+ F_STONITH_OPERATION,
+ F_STONITH_CALLID,
+ F_STONITH_CLIENTID,
+ F_STONITH_CLIENTNAME,
+ F_STONITH_REMOTE_OP_ID,
+ F_STONITH_CALLOPTS
+ };
+
+ for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) {
+ name = names[lpc];
+ value = crm_element_value(request, name);
+ crm_xml_add(reply, name, value);
+ }
+ if (data != NULL) {
+ add_message_xml(reply, F_STONITH_CALLDATA, data);
+ }
+ }
+ return reply;
+}
+
+/*!
+ * \internal
+ * \brief Build an XML reply to an asynchronous fencing command
+ *
+ * \param[in] cmd Fencing command that reply is for
+ * \param[in] result Command result
+ */
+static xmlNode *
+construct_async_reply(const async_command_t *cmd,
+ const pcmk__action_result_t *result)
+{
+ xmlNode *reply = create_xml_node(NULL, T_STONITH_REPLY);
+
+ crm_xml_add(reply, "st_origin", __func__);
+ crm_xml_add(reply, F_TYPE, T_STONITH_NG);
+ crm_xml_add(reply, F_STONITH_OPERATION, cmd->op);
+ crm_xml_add(reply, F_STONITH_DEVICE, cmd->device);
+ crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
+ crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client);
+ crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name);
+ crm_xml_add(reply, F_STONITH_TARGET, cmd->target);
+ crm_xml_add(reply, F_STONITH_ACTION, cmd->op);
+ crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin);
+ crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id);
+ crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options);
+
+ stonith__xe_set_result(reply, result);
+ return reply;
+}
+
+bool fencing_peer_active(crm_node_t *peer)
+{
+ if (peer == NULL) {
+ return FALSE;
+ } else if (peer->uname == NULL) {
+ return FALSE;
+ } else if (pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+void
+set_fencing_completed(remote_fencing_op_t *op)
+{
+ struct timespec tv;
+
+ qb_util_timespec_from_epoch_get(&tv);
+ op->completed = tv.tv_sec;
+ op->completed_nsec = tv.tv_nsec;
+}
+
+/*!
+ * \internal
+ * \brief Look for alternate node needed if local node shouldn't fence target
+ *
+ * \param[in] target Node that must be fenced
+ *
+ * \return Name of an alternate node that should fence \p target if any,
+ * or NULL otherwise
+ */
+static const char *
+check_alternate_host(const char *target)
+{
+ if (pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) {
+ GHashTableIter gIter;
+ crm_node_t *entry = NULL;
+
+ g_hash_table_iter_init(&gIter, crm_peer_cache);
+ while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
+ if (fencing_peer_active(entry)
+ && !pcmk__str_eq(entry->uname, target, pcmk__str_casei)) {
+ crm_notice("Forwarding self-fencing request to %s",
+ entry->uname);
+ return entry->uname;
+ }
+ }
+ crm_warn("Will handle own fencing because no peer can");
+ }
+ return NULL;
+}
+
+/*!
+ * \internal
+ * \brief Send a reply to a CPG peer or IPC client
+ *
+ * \param[in] reply XML reply to send
+ * \param[in] call_options Send synchronously if st_opt_sync_call is set
+ * \param[in] remote_peer If not NULL, name of peer node to send CPG reply
+ * \param[in,out] client If not NULL, client to send IPC reply
+ */
+static void
+stonith_send_reply(xmlNode *reply, int call_options, const char *remote_peer,
+ pcmk__client_t *client)
+{
+ CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)),
+ return);
+
+ if (remote_peer == NULL) {
+ do_local_reply(reply, client, call_options);
+ } else {
+ send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng,
+ reply, FALSE);
+ }
+}
+
+static void
+remove_relay_op(xmlNode * request)
+{
+ xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, request, LOG_TRACE);
+ const char *relay_op_id = NULL;
+ const char *op_id = NULL;
+ const char *client_name = NULL;
+ const char *target = NULL;
+ remote_fencing_op_t *relay_op = NULL;
+
+ if (dev) {
+ target = crm_element_value(dev, F_STONITH_TARGET);
+ }
+
+ relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID_RELAY);
+ op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID);
+ client_name = crm_element_value(request, F_STONITH_CLIENTNAME);
+
+ /* Delete RELAY operation. */
+ if (relay_op_id && target && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) {
+ relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id);
+
+ if (relay_op) {
+ GHashTableIter iter;
+ remote_fencing_op_t *list_op = NULL;
+ g_hash_table_iter_init(&iter, stonith_remote_op_list);
+
+ /* If the operation to be deleted is registered as a duplicate, delete the registration. */
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) {
+ GList *dup_iter = NULL;
+ if (list_op != relay_op) {
+ for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) {
+ remote_fencing_op_t *other = dup_iter->data;
+ if (other == relay_op) {
+ other->duplicates = g_list_remove(other->duplicates, relay_op);
+ break;
+ }
+ }
+ }
+ }
+ crm_debug("Deleting relay op %s ('%s'%s%s for %s), "
+ "replaced by op %s ('%s'%s%s for %s)",
+ relay_op->id, relay_op->action,
+ (relay_op->target == NULL)? "" : " targeting ",
+ pcmk__s(relay_op->target, ""),
+ relay_op->client_name, op_id, relay_op->action,
+ (target == NULL)? "" : " targeting ", pcmk__s(target, ""),
+ client_name);
+
+ g_hash_table_remove(stonith_remote_op_list, relay_op_id);
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Check whether an API request was sent by a privileged user
+ *
+ * API commands related to fencing configuration may be done only by privileged
+ * IPC users (i.e. root or hacluster), because all other users should go through
+ * the CIB to have ACLs applied. If no client was given, this is a peer request,
+ * which is always allowed.
+ *
+ * \param[in] c IPC client that sent request (or NULL if sent by CPG peer)
+ * \param[in] op Requested API operation (for logging only)
+ *
+ * \return true if sender is peer or privileged client, otherwise false
+ */
+static inline bool
+is_privileged(const pcmk__client_t *c, const char *op)
+{
+ if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) {
+ return true;
+ } else {
+ crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
+ pcmk__s(op, ""), pcmk__client_name(c));
+ return false;
+ }
+}
+
+// CRM_OP_REGISTER
+static xmlNode *
+handle_register_request(pcmk__request_t *request)
+{
+ xmlNode *reply = create_xml_node(NULL, "reply");
+
+ CRM_ASSERT(request->ipc_client != NULL);
+ crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER);
+ crm_xml_add(reply, F_STONITH_CLIENTID, request->ipc_client->id);
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ pcmk__set_request_flags(request, pcmk__request_reuse_options);
+ return reply;
+}
+
+// STONITH_OP_EXEC
+static xmlNode *
+handle_agent_request(pcmk__request_t *request)
+{
+ execute_agent_action(request->xml, &request->result);
+ if (request->result.execution_status == PCMK_EXEC_PENDING) {
+ return NULL;
+ }
+ return fenced_construct_reply(request->xml, NULL, &request->result);
+}
+
+// STONITH_OP_TIMEOUT_UPDATE
+static xmlNode *
+handle_update_timeout_request(pcmk__request_t *request)
+{
+ const char *call_id = crm_element_value(request->xml, F_STONITH_CALLID);
+ const char *client_id = crm_element_value(request->xml, F_STONITH_CLIENTID);
+ int op_timeout = 0;
+
+ crm_element_value_int(request->xml, F_STONITH_TIMEOUT, &op_timeout);
+ do_stonith_async_timeout_update(client_id, call_id, op_timeout);
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ return NULL;
+}
+
+// STONITH_OP_QUERY
+static xmlNode *
+handle_query_request(pcmk__request_t *request)
+{
+ int timeout = 0;
+ xmlNode *dev = NULL;
+ const char *action = NULL;
+ const char *target = NULL;
+ const char *client_id = crm_element_value(request->xml, F_STONITH_CLIENTID);
+ struct st_query_data *query = NULL;
+
+ if (request->peer != NULL) {
+ // Record it for the future notification
+ create_remote_stonith_op(client_id, request->xml, TRUE);
+ }
+
+ /* Delete the DC node RELAY operation. */
+ remove_relay_op(request->xml);
+
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+
+ dev = get_xpath_object("//@" F_STONITH_ACTION, request->xml, LOG_NEVER);
+ if (dev != NULL) {
+ const char *device = crm_element_value(dev, F_STONITH_DEVICE);
+
+ if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) {
+ return NULL; // No query or reply necessary
+ }
+ target = crm_element_value(dev, F_STONITH_TARGET);
+ action = crm_element_value(dev, F_STONITH_ACTION);
+ }
+
+ crm_log_xml_trace(request->xml, "Query");
+
+ query = calloc(1, sizeof(struct st_query_data));
+ CRM_ASSERT(query != NULL);
+
+ query->reply = fenced_construct_reply(request->xml, NULL, &request->result);
+ pcmk__str_update(&query->remote_peer, request->peer);
+ pcmk__str_update(&query->client_id, client_id);
+ pcmk__str_update(&query->target, target);
+ pcmk__str_update(&query->action, action);
+ query->call_options = request->call_options;
+
+ crm_element_value_int(request->xml, F_STONITH_TIMEOUT, &timeout);
+ get_capable_devices(target, action, timeout,
+ pcmk_is_set(query->call_options, st_opt_allow_suicide),
+ query, stonith_query_capable_device_cb, st_device_supports_none);
+ return NULL;
+}
+
+// T_STONITH_NOTIFY
+static xmlNode *
+handle_notify_request(pcmk__request_t *request)
+{
+ const char *flag_name = NULL;
+
+ CRM_ASSERT(request->ipc_client != NULL);
+ flag_name = crm_element_value(request->xml, F_STONITH_NOTIFY_ACTIVATE);
+ if (flag_name != NULL) {
+ crm_debug("Enabling %s callbacks for client %s",
+ flag_name, pcmk__request_origin(request));
+ pcmk__set_client_flags(request->ipc_client, get_stonith_flag(flag_name));
+ }
+
+ flag_name = crm_element_value(request->xml, F_STONITH_NOTIFY_DEACTIVATE);
+ if (flag_name != NULL) {
+ crm_debug("Disabling %s callbacks for client %s",
+ flag_name, pcmk__request_origin(request));
+ pcmk__clear_client_flags(request->ipc_client,
+ get_stonith_flag(flag_name));
+ }
+
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ pcmk__set_request_flags(request, pcmk__request_reuse_options);
+
+ return pcmk__ipc_create_ack(request->ipc_flags, "ack", NULL, CRM_EX_OK);
+}
+
+// STONITH_OP_RELAY
+static xmlNode *
+handle_relay_request(pcmk__request_t *request)
+{
+ xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request->xml,
+ LOG_TRACE);
+
+ crm_notice("Received forwarded fencing request from "
+ "%s %s to fence (%s) peer %s",
+ pcmk__request_origin_type(request),
+ pcmk__request_origin(request),
+ crm_element_value(dev, F_STONITH_ACTION),
+ crm_element_value(dev, F_STONITH_TARGET));
+
+ if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) {
+ fenced_set_protocol_error(&request->result);
+ return fenced_construct_reply(request->xml, NULL, &request->result);
+ }
+
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
+ return NULL;
+}
+
+// STONITH_OP_FENCE
+static xmlNode *
+handle_fence_request(pcmk__request_t *request)
+{
+ if ((request->peer != NULL) || stand_alone) {
+ fence_locally(request->xml, &request->result);
+
+ } else if (pcmk_is_set(request->call_options, st_opt_manual_ack)) {
+ switch (fenced_handle_manual_confirmation(request->ipc_client,
+ request->xml)) {
+ case pcmk_rc_ok:
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE,
+ NULL);
+ break;
+ case EINPROGRESS:
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
+ NULL);
+ break;
+ default:
+ fenced_set_protocol_error(&request->result);
+ break;
+ }
+
+ } else {
+ const char *alternate_host = NULL;
+ xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request->xml,
+ LOG_TRACE);
+ const char *target = crm_element_value(dev, F_STONITH_TARGET);
+ const char *action = crm_element_value(dev, F_STONITH_ACTION);
+ const char *device = crm_element_value(dev, F_STONITH_DEVICE);
+
+ if (request->ipc_client != NULL) {
+ int tolerance = 0;
+
+ crm_notice("Client %s wants to fence (%s) %s using %s",
+ pcmk__request_origin(request), action,
+ target, (device? device : "any device"));
+ crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance);
+ if (stonith_check_fence_tolerance(tolerance, target, action)) {
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE,
+ NULL);
+ return fenced_construct_reply(request->xml, NULL,
+ &request->result);
+ }
+ alternate_host = check_alternate_host(target);
+
+ } else {
+ crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'",
+ request->peer, action, target,
+ (device == NULL)? "(any)" : device);
+ }
+
+ if (alternate_host != NULL) {
+ const char *client_id = NULL;
+ remote_fencing_op_t *op = NULL;
+
+ if (request->ipc_client->id == 0) {
+ client_id = crm_element_value(request->xml, F_STONITH_CLIENTID);
+ } else {
+ client_id = request->ipc_client->id;
+ }
+
+ /* Create a duplicate fencing operation to relay with the client ID.
+ * When a query response is received, this operation should be
+ * deleted to avoid keeping the duplicate around.
+ */
+ op = create_remote_stonith_op(client_id, request->xml, FALSE);
+
+ crm_xml_add(request->xml, F_STONITH_OPERATION, STONITH_OP_RELAY);
+ crm_xml_add(request->xml, F_STONITH_CLIENTID,
+ request->ipc_client->id);
+ crm_xml_add(request->xml, F_STONITH_REMOTE_OP_ID, op->id);
+ send_cluster_message(crm_get_peer(0, alternate_host),
+ crm_msg_stonith_ng, request->xml, FALSE);
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
+ NULL);
+
+ } else if (initiate_remote_stonith_op(request->ipc_client, request->xml,
+ FALSE) == NULL) {
+ fenced_set_protocol_error(&request->result);
+
+ } else {
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
+ NULL);
+ }
+ }
+
+ if (request->result.execution_status == PCMK_EXEC_PENDING) {
+ return NULL;
+ }
+ return fenced_construct_reply(request->xml, NULL, &request->result);
+}
+
+// STONITH_OP_FENCE_HISTORY
+static xmlNode *
+handle_history_request(pcmk__request_t *request)
+{
+ xmlNode *reply = NULL;
+ xmlNode *data = NULL;
+
+ stonith_fence_history(request->xml, &data, request->peer,
+ request->call_options);
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ if (!pcmk_is_set(request->call_options, st_opt_discard_reply)) {
+ /* When the local node broadcasts its history, it sets
+ * st_opt_discard_reply and doesn't need a reply.
+ */
+ reply = fenced_construct_reply(request->xml, data, &request->result);
+ }
+ free_xml(data);
+ return reply;
+}
+
+// STONITH_OP_DEVICE_ADD
+static xmlNode *
+handle_device_add_request(pcmk__request_t *request)
+{
+ const char *op = crm_element_value(request->xml, F_STONITH_OPERATION);
+ xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request->xml,
+ LOG_ERR);
+
+ if (is_privileged(request->ipc_client, op)) {
+ int rc = stonith_device_register(dev, FALSE);
+
+ pcmk__set_result(&request->result,
+ ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
+ stonith__legacy2status(rc),
+ ((rc == pcmk_ok)? NULL : pcmk_strerror(rc)));
+ } else {
+ pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
+ PCMK_EXEC_INVALID,
+ "Unprivileged users must register device via CIB");
+ }
+ fenced_send_device_notification(op, &request->result,
+ (dev == NULL)? NULL : ID(dev));
+ return fenced_construct_reply(request->xml, NULL, &request->result);
+}
+
+// STONITH_OP_DEVICE_DEL
+static xmlNode *
+handle_device_delete_request(pcmk__request_t *request)
+{
+ xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request->xml,
+ LOG_ERR);
+ const char *device_id = crm_element_value(dev, XML_ATTR_ID);
+ const char *op = crm_element_value(request->xml, F_STONITH_OPERATION);
+
+ if (is_privileged(request->ipc_client, op)) {
+ stonith_device_remove(device_id, false);
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ } else {
+ pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
+ PCMK_EXEC_INVALID,
+ "Unprivileged users must delete device via CIB");
+ }
+ fenced_send_device_notification(op, &request->result, device_id);
+ return fenced_construct_reply(request->xml, NULL, &request->result);
+}
+
+// STONITH_OP_LEVEL_ADD
+static xmlNode *
+handle_level_add_request(pcmk__request_t *request)
+{
+ char *desc = NULL;
+ const char *op = crm_element_value(request->xml, F_STONITH_OPERATION);
+
+ if (is_privileged(request->ipc_client, op)) {
+ fenced_register_level(request->xml, &desc, &request->result);
+ } else {
+ unpack_level_request(request->xml, NULL, NULL, NULL, &desc);
+ pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
+ PCMK_EXEC_INVALID,
+ "Unprivileged users must add level via CIB");
+ }
+ fenced_send_level_notification(op, &request->result, desc);
+ free(desc);
+ return fenced_construct_reply(request->xml, NULL, &request->result);
+}
+
+// STONITH_OP_LEVEL_DEL
+static xmlNode *
+handle_level_delete_request(pcmk__request_t *request)
+{
+ char *desc = NULL;
+ const char *op = crm_element_value(request->xml, F_STONITH_OPERATION);
+
+ if (is_privileged(request->ipc_client, op)) {
+ fenced_unregister_level(request->xml, &desc, &request->result);
+ } else {
+ unpack_level_request(request->xml, NULL, NULL, NULL, &desc);
+ pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
+ PCMK_EXEC_INVALID,
+ "Unprivileged users must delete level via CIB");
+ }
+ fenced_send_level_notification(op, &request->result, desc);
+ free(desc);
+ return fenced_construct_reply(request->xml, NULL, &request->result);
+}
+
+// CRM_OP_RM_NODE_CACHE
+static xmlNode *
+handle_cache_request(pcmk__request_t *request)
+{
+ int node_id = 0;
+ const char *name = NULL;
+
+ crm_element_value_int(request->xml, XML_ATTR_ID, &node_id);
+ name = crm_element_value(request->xml, XML_ATTR_UNAME);
+ reap_crm_member(node_id, name);
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ return NULL;
+}
+
+static xmlNode *
+handle_unknown_request(pcmk__request_t *request)
+{
+ crm_err("Unknown IPC request %s from %s %s",
+ request->op, pcmk__request_origin_type(request),
+ pcmk__request_origin(request));
+ pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
+ "Unknown IPC request type '%s' (bug?)", request->op);
+ return fenced_construct_reply(request->xml, NULL, &request->result);
+}
+
+static void
+fenced_register_handlers(void)
+{
+ pcmk__server_command_t handlers[] = {
+ { CRM_OP_REGISTER, handle_register_request },
+ { STONITH_OP_EXEC, handle_agent_request },
+ { STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request },
+ { STONITH_OP_QUERY, handle_query_request },
+ { T_STONITH_NOTIFY, handle_notify_request },
+ { STONITH_OP_RELAY, handle_relay_request },
+ { STONITH_OP_FENCE, handle_fence_request },
+ { STONITH_OP_FENCE_HISTORY, handle_history_request },
+ { STONITH_OP_DEVICE_ADD, handle_device_add_request },
+ { STONITH_OP_DEVICE_DEL, handle_device_delete_request },
+ { STONITH_OP_LEVEL_ADD, handle_level_add_request },
+ { STONITH_OP_LEVEL_DEL, handle_level_delete_request },
+ { CRM_OP_RM_NODE_CACHE, handle_cache_request },
+ { NULL, handle_unknown_request },
+ };
+
+ fenced_handlers = pcmk__register_handlers(handlers);
+}
+
+void
+fenced_unregister_handlers(void)
+{
+ if (fenced_handlers != NULL) {
+ g_hash_table_destroy(fenced_handlers);
+ fenced_handlers = NULL;
+ }
+}
+
+static void
+handle_request(pcmk__request_t *request)
+{
+ xmlNode *reply = NULL;
+ const char *reason = NULL;
+
+ if (fenced_handlers == NULL) {
+ fenced_register_handlers();
+ }
+ reply = pcmk__process_request(request, fenced_handlers);
+ if (reply != NULL) {
+ if (pcmk_is_set(request->flags, pcmk__request_reuse_options)
+ && (request->ipc_client != NULL)) {
+ /* Certain IPC-only commands must reuse the call options from the
+ * original request rather than the ones set by stonith_send_reply()
+ * -> do_local_reply().
+ */
+ pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply,
+ request->ipc_flags);
+ request->ipc_client->request_id = 0;
+ } else {
+ stonith_send_reply(reply, request->call_options,
+ request->peer, request->ipc_client);
+ }
+ free_xml(reply);
+ }
+
+ reason = request->result.exit_reason;
+ crm_debug("Processed %s request from %s %s: %s%s%s%s",
+ request->op, pcmk__request_origin_type(request),
+ pcmk__request_origin(request),
+ pcmk_exec_status_str(request->result.execution_status),
+ (reason == NULL)? "" : " (",
+ (reason == NULL)? "" : reason,
+ (reason == NULL)? "" : ")");
+}
+
+static void
+handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer)
+{
+ // Copy, because request might be freed before we want to log this
+ char *op = crm_element_value_copy(request, F_STONITH_OPERATION);
+
+ if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
+ process_remote_stonith_query(request);
+ } else if (pcmk__str_any_of(op, T_STONITH_NOTIFY, STONITH_OP_FENCE, NULL)) {
+ fenced_process_fencing_reply(request);
+ } else {
+ crm_err("Ignoring unknown %s reply from %s %s",
+ pcmk__s(op, "untyped"), ((client == NULL)? "peer" : "client"),
+ ((client == NULL)? remote_peer : pcmk__client_name(client)));
+ crm_log_xml_warn(request, "UnknownOp");
+ free(op);
+ return;
+ }
+ crm_debug("Processed %s reply from %s %s",
+ op, ((client == NULL)? "peer" : "client"),
+ ((client == NULL)? remote_peer : pcmk__client_name(client)));
+ free(op);
+}
+
+/*!
+ * \internal
+ * \brief Handle a message from an IPC client or CPG peer
+ *
+ * \param[in,out] client If not NULL, IPC client that sent message
+ * \param[in] id If from IPC client, IPC message ID
+ * \param[in] flags Message flags
+ * \param[in,out] message Message XML
+ * \param[in] remote_peer If not NULL, CPG peer that sent message
+ */
+void
+stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
+ xmlNode *message, const char *remote_peer)
+{
+ int call_options = st_opt_none;
+ bool is_reply = false;
+
+ CRM_CHECK(message != NULL, return);
+
+ if (get_xpath_object("//" T_STONITH_REPLY, message, LOG_NEVER) != NULL) {
+ is_reply = true;
+ }
+ crm_element_value_int(message, F_STONITH_CALLOPTS, &call_options);
+ crm_debug("Processing %ssynchronous %s %s %u from %s %s",
+ pcmk_is_set(call_options, st_opt_sync_call)? "" : "a",
+ crm_element_value(message, F_STONITH_OPERATION),
+ (is_reply? "reply" : "request"), id,
+ ((client == NULL)? "peer" : "client"),
+ ((client == NULL)? remote_peer : pcmk__client_name(client)));
+
+ if (pcmk_is_set(call_options, st_opt_sync_call)) {
+ CRM_ASSERT(client == NULL || client->request_id == id);
+ }
+
+ if (is_reply) {
+ handle_reply(client, message, remote_peer);
+ } else {
+ pcmk__request_t request = {
+ .ipc_client = client,
+ .ipc_id = id,
+ .ipc_flags = flags,
+ .peer = remote_peer,
+ .xml = message,
+ .call_options = call_options,
+ .result = PCMK__UNKNOWN_RESULT,
+ };
+
+ request.op = crm_element_value_copy(request.xml, F_STONITH_OPERATION);
+ CRM_CHECK(request.op != NULL, return);
+
+ if (pcmk_is_set(request.call_options, st_opt_sync_call)) {
+ pcmk__set_request_flags(&request, pcmk__request_sync);
+ }
+
+ handle_request(&request);
+ pcmk__reset_request(&request);
+ }
+}
diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
new file mode 100644
index 0000000..a766477
--- /dev/null
+++ b/daemons/fenced/fenced_history.c
@@ -0,0 +1,548 @@
+/*
+ * Copyright 2009-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/cluster/internal.h>
+
+#include <crm/stonith-ng.h>
+#include <crm/fencing/internal.h>
+#include <crm/common/xml.h>
+#include <crm/common/xml_internal.h>
+
+#include <pacemaker-fenced.h>
+
+#define MAX_STONITH_HISTORY 500
+
+/*!
+ * \internal
+ * \brief Send a broadcast to all nodes to trigger cleanup or
+ * history synchronisation
+ *
+ * \param[in] history Optional history to be attached
+ * \param[in] callopts We control cleanup via a flag in the callopts
+ * \param[in] target Cleanup can be limited to certain fence-targets
+ */
+static void
+stonith_send_broadcast_history(xmlNode *history,
+ int callopts,
+ const char *target)
+{
+ xmlNode *bcast = create_xml_node(NULL, "stonith_command");
+ xmlNode *data = create_xml_node(NULL, __func__);
+
+ if (target) {
+ crm_xml_add(data, F_STONITH_TARGET, target);
+ }
+ crm_xml_add(bcast, F_TYPE, T_STONITH_NG);
+ crm_xml_add(bcast, F_SUBTYPE, "broadcast");
+ crm_xml_add(bcast, F_STONITH_OPERATION, STONITH_OP_FENCE_HISTORY);
+ crm_xml_add_int(bcast, F_STONITH_CALLOPTS, callopts);
+ if (history) {
+ add_node_copy(data, history);
+ }
+ add_message_xml(bcast, F_STONITH_CALLDATA, data);
+ send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);
+
+ free_xml(data);
+ free_xml(bcast);
+}
+
+static gboolean
+stonith_remove_history_entry (gpointer key,
+ gpointer value,
+ gpointer user_data)
+{
+ remote_fencing_op_t *op = value;
+ const char *target = (const char *) user_data;
+
+ if ((op->state == st_failed) || (op->state == st_done)) {
+ if ((target) && (strcmp(op->target, target) != 0)) {
+ return FALSE;
+ }
+ return TRUE;
+ }
+
+ return FALSE; /* don't clean pending operations */
+}
+
+/*!
+ * \internal
+ * \brief Send out a cleanup broadcast or do a local history-cleanup
+ *
+ * \param[in] target Cleanup can be limited to certain fence-targets
+ * \param[in] broadcast Send out a cleanup broadcast
+ */
+static void
+stonith_fence_history_cleanup(const char *target,
+ gboolean broadcast)
+{
+ if (broadcast) {
+ stonith_send_broadcast_history(NULL,
+ st_opt_cleanup | st_opt_discard_reply,
+ target);
+ /* we'll do the local clean when we receive back our own broadcast */
+ } else if (stonith_remote_op_list) {
+ g_hash_table_foreach_remove(stonith_remote_op_list,
+ stonith_remove_history_entry,
+ (gpointer) target);
+ fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
+ }
+}
+
+/* keeping the length of fence-history within bounds
+ * =================================================
+ *
+ * If things are really running wild a lot of fencing-attempts
+ * might fill up the hash-map, eventually using up a lot
+ * of memory and creating huge history-sync messages.
+ * Before the history being synced across nodes at least
+ * the reboot of a cluster-node helped keeping the
+ * history within bounds even though not in a reliable
+ * manner.
+ *
+ * stonith_remote_op_list isn't sorted for time-stamps
+ * thus it would be kind of expensive to delete e.g.
+ * the oldest entry if it would grow past MAX_STONITH_HISTORY
+ * entries.
+ * It is more efficient to purge MAX_STONITH_HISTORY/2
+ * entries whenever the list grows beyond MAX_STONITH_HISTORY.
+ * (sort for age + purge the MAX_STONITH_HISTORY/2 oldest)
+ * That done on a per-node-base might raise the
+ * probability of large syncs to occur.
+ * Things like introducing a broadcast to purge
+ * MAX_STONITH_HISTORY/2 entries or not sync above a certain
+ * threshold coming to mind ...
+ * Simplest thing though is to purge the full history
+ * throughout the cluster once MAX_STONITH_HISTORY is reached.
+ * On the other hand this leads to purging the history in
+ * situations where it would be handy to have it probably.
+ */
+
+
+static int
+op_time_sort(const void *a_voidp, const void *b_voidp)
+{
+ const remote_fencing_op_t **a = (const remote_fencing_op_t **) a_voidp;
+ const remote_fencing_op_t **b = (const remote_fencing_op_t **) b_voidp;
+ gboolean a_pending = ((*a)->state != st_failed) && ((*a)->state != st_done);
+ gboolean b_pending = ((*b)->state != st_failed) && ((*b)->state != st_done);
+
+ if (a_pending && b_pending) {
+ return 0;
+ } else if (a_pending) {
+ return -1;
+ } else if (b_pending) {
+ return 1;
+ } else if ((*b)->completed == (*a)->completed) {
+ if ((*b)->completed_nsec > (*a)->completed_nsec) {
+ return 1;
+ } else if ((*b)->completed_nsec == (*a)->completed_nsec) {
+ return 0;
+ }
+ } else if ((*b)->completed > (*a)->completed) {
+ return 1;
+ }
+
+ return -1;
+}
+
+
+/*!
+ * \internal
+ * \brief Do a local history-trim to MAX_STONITH_HISTORY / 2 entries
+ * once over MAX_STONITH_HISTORY
+ */
+void
+stonith_fence_history_trim(void)
+{
+ guint num_ops;
+
+ if (!stonith_remote_op_list) {
+ return;
+ }
+ num_ops = g_hash_table_size(stonith_remote_op_list);
+ if (num_ops > MAX_STONITH_HISTORY) {
+ remote_fencing_op_t *ops[num_ops];
+ remote_fencing_op_t *op = NULL;
+ GHashTableIter iter;
+ int i;
+
+ crm_trace("Fencing History growing beyond limit of %d so purge "
+ "half of failed/successful attempts", MAX_STONITH_HISTORY);
+
+ /* write all ops into an array */
+ i = 0;
+ g_hash_table_iter_init(&iter, stonith_remote_op_list);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
+ ops[i++] = op;
+ }
+ /* run quicksort over the array so that we get pending ops
+ * first and then sorted most recent to oldest
+ */
+ qsort(ops, num_ops, sizeof(remote_fencing_op_t *), op_time_sort);
+ /* purgest oldest half of the history entries */
+ for (i = MAX_STONITH_HISTORY / 2; i < num_ops; i++) {
+ /* keep pending ops even if they shouldn't fill more than
+ * half of our buffer
+ */
+ if ((ops[i]->state == st_failed) || (ops[i]->state == st_done)) {
+ g_hash_table_remove(stonith_remote_op_list, ops[i]->id);
+ }
+ }
+ /* we've just purged valid data from the list so there is no need
+ * to create a notification - if displayed it can stay
+ */
+ }
+}
+
+/*!
+ * \internal
+ * \brief Convert xml fence-history to a hash-table like stonith_remote_op_list
+ *
+ * \param[in] history Fence-history in xml
+ *
+ * \return Fence-history as hash-table
+ */
+static GHashTable *
+stonith_xml_history_to_list(const xmlNode *history)
+{
+ xmlNode *xml_op = NULL;
+ GHashTable *rv = NULL;
+
+ init_stonith_remote_op_hash_table(&rv);
+
+ CRM_LOG_ASSERT(rv != NULL);
+
+ for (xml_op = pcmk__xml_first_child(history); xml_op != NULL;
+ xml_op = pcmk__xml_next(xml_op)) {
+ remote_fencing_op_t *op = NULL;
+ char *id = crm_element_value_copy(xml_op, F_STONITH_REMOTE_OP_ID);
+ int state;
+ int exit_status = CRM_EX_OK;
+ int execution_status = PCMK_EXEC_DONE;
+ long long completed;
+ long long completed_nsec = 0L;
+
+ if (!id) {
+ crm_warn("Malformed fencing history received from peer");
+ continue;
+ }
+
+ crm_trace("Attaching op %s to hashtable", id);
+
+ op = calloc(1, sizeof(remote_fencing_op_t));
+
+ op->id = id;
+ op->target = crm_element_value_copy(xml_op, F_STONITH_TARGET);
+ op->action = crm_element_value_copy(xml_op, F_STONITH_ACTION);
+ op->originator = crm_element_value_copy(xml_op, F_STONITH_ORIGIN);
+ op->delegate = crm_element_value_copy(xml_op, F_STONITH_DELEGATE);
+ op->client_name = crm_element_value_copy(xml_op, F_STONITH_CLIENTNAME);
+ crm_element_value_ll(xml_op, F_STONITH_DATE, &completed);
+ op->completed = (time_t) completed;
+ crm_element_value_ll(xml_op, F_STONITH_DATE_NSEC, &completed_nsec);
+ op->completed_nsec = completed_nsec;
+ crm_element_value_int(xml_op, F_STONITH_STATE, &state);
+ op->state = (enum op_state) state;
+
+ /* @COMPAT We can't use stonith__xe_get_result() here because
+ * fencers <2.1.3 didn't include results, leading it to assume an error
+ * status. Instead, set an unknown status in that case.
+ */
+ if ((crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &exit_status) < 0)
+ || (crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS,
+ &execution_status) < 0)) {
+ exit_status = CRM_EX_INDETERMINATE;
+ execution_status = PCMK_EXEC_UNKNOWN;
+ }
+ pcmk__set_result(&op->result, exit_status, execution_status,
+ crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON));
+ pcmk__set_result_output(&op->result,
+ crm_element_value_copy(xml_op, F_STONITH_OUTPUT),
+ NULL);
+
+
+ g_hash_table_replace(rv, id, op);
+ CRM_LOG_ASSERT(g_hash_table_lookup(rv, id) != NULL);
+ }
+
+ return rv;
+}
+
+/*!
+ * \internal
+ * \brief Craft xml difference between local fence-history and a history
+ * coming from remote, and merge the remote history into the local
+ *
+ * \param[in,out] remote_history Fence-history as hash-table (may be NULL)
+ * \param[in] add_id If crafting the answer for an API
+ * history-request there is no need for the id
+ * \param[in] target Optionally limit to certain fence-target
+ *
+ * \return The fence-history as xml
+ */
+static xmlNode *
+stonith_local_history_diff_and_merge(GHashTable *remote_history,
+ gboolean add_id, const char *target)
+{
+ xmlNode *history = NULL;
+ GHashTableIter iter;
+ remote_fencing_op_t *op = NULL;
+ gboolean updated = FALSE;
+ int cnt = 0;
+
+ if (stonith_remote_op_list) {
+ char *id = NULL;
+
+ history = create_xml_node(NULL, F_STONITH_HISTORY_LIST);
+
+ g_hash_table_iter_init(&iter, stonith_remote_op_list);
+ while (g_hash_table_iter_next(&iter, (void **)&id, (void **)&op)) {
+ xmlNode *entry = NULL;
+
+ if (remote_history) {
+ remote_fencing_op_t *remote_op =
+ g_hash_table_lookup(remote_history, op->id);
+
+ if (remote_op) {
+ if (stonith__op_state_pending(op->state)
+ && !stonith__op_state_pending(remote_op->state)) {
+
+ crm_debug("Updating outdated pending operation %.8s "
+ "(state=%s) according to the one (state=%s) from "
+ "remote peer history",
+ op->id, stonith_op_state_str(op->state),
+ stonith_op_state_str(remote_op->state));
+
+ g_hash_table_steal(remote_history, op->id);
+ op->id = remote_op->id;
+ remote_op->id = id;
+ g_hash_table_iter_replace(&iter, remote_op);
+
+ updated = TRUE;
+ continue; /* skip outdated entries */
+
+ } else if (!stonith__op_state_pending(op->state)
+ && stonith__op_state_pending(remote_op->state)) {
+
+ crm_debug("Broadcasting operation %.8s (state=%s) to "
+ "update the outdated pending one "
+ "(state=%s) in remote peer history",
+ op->id, stonith_op_state_str(op->state),
+ stonith_op_state_str(remote_op->state));
+
+ g_hash_table_remove(remote_history, op->id);
+
+ } else {
+ g_hash_table_remove(remote_history, op->id);
+ continue; /* skip entries broadcasted already */
+ }
+ }
+ }
+
+ if (!pcmk__str_eq(target, op->target, pcmk__str_null_matches)) {
+ continue;
+ }
+
+ cnt++;
+ crm_trace("Attaching op %s", op->id);
+ entry = create_xml_node(history, STONITH_OP_EXEC);
+ if (add_id) {
+ crm_xml_add(entry, F_STONITH_REMOTE_OP_ID, op->id);
+ }
+ crm_xml_add(entry, F_STONITH_TARGET, op->target);
+ crm_xml_add(entry, F_STONITH_ACTION, op->action);
+ crm_xml_add(entry, F_STONITH_ORIGIN, op->originator);
+ crm_xml_add(entry, F_STONITH_DELEGATE, op->delegate);
+ crm_xml_add(entry, F_STONITH_CLIENTNAME, op->client_name);
+ crm_xml_add_ll(entry, F_STONITH_DATE, op->completed);
+ crm_xml_add_ll(entry, F_STONITH_DATE_NSEC, op->completed_nsec);
+ crm_xml_add_int(entry, F_STONITH_STATE, op->state);
+ stonith__xe_set_result(entry, &op->result);
+ }
+ }
+
+ if (remote_history) {
+ init_stonith_remote_op_hash_table(&stonith_remote_op_list);
+
+ updated |= g_hash_table_size(remote_history);
+
+ g_hash_table_iter_init(&iter, remote_history);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
+ if (stonith__op_state_pending(op->state) &&
+ pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
+
+ crm_warn("Failing pending operation %.8s originated by us but "
+ "known only from peer history", op->id);
+ op->state = st_failed;
+ set_fencing_completed(op);
+
+ /* CRM_EX_EXPIRED + PCMK_EXEC_INVALID prevents finalize_op()
+ * from setting a delegate
+ */
+ pcmk__set_result(&op->result, CRM_EX_EXPIRED, PCMK_EXEC_INVALID,
+ "Initiated by earlier fencer "
+ "process and presumed failed");
+ fenced_broadcast_op_result(op, false);
+ }
+
+ g_hash_table_iter_steal(&iter);
+ g_hash_table_replace(stonith_remote_op_list, op->id, op);
+ /* we could trim the history here but if we bail
+ * out after trim we might miss more recent entries
+ * of those that might still be in the list
+ * if we don't bail out trimming once is more
+ * efficient and memory overhead is minimal as
+ * we are just moving pointers from one hash to
+ * another
+ */
+ }
+
+ g_hash_table_destroy(remote_history); /* remove what is left */
+ }
+
+ if (updated) {
+ stonith_fence_history_trim();
+ fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
+ }
+
+ if (cnt == 0) {
+ free_xml(history);
+ return NULL;
+ } else {
+ return history;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Craft xml from the local fence-history
+ *
+ * \param[in] add_id If crafting the answer for an API
+ * history-request there is no need for the id
+ * \param[in] target Optionally limit to certain fence-target
+ *
+ * \return The fence-history as xml
+ */
+static xmlNode *
+stonith_local_history(gboolean add_id, const char *target)
+{
+ return stonith_local_history_diff_and_merge(NULL, add_id, target);
+}
+
+/*!
+ * \internal
+ * \brief Handle fence-history messages (from API or coming in as broadcasts)
+ *
+ * \param[in,out] msg Request XML
+ * \param[out] output Where to set local history, if requested
+ * \param[in] remote_peer If broadcast, peer that sent it
+ * \param[in] options Call options from the request
+ */
+void
+stonith_fence_history(xmlNode *msg, xmlNode **output,
+ const char *remote_peer, int options)
+{
+ const char *target = NULL;
+ xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_NEVER);
+ xmlNode *out_history = NULL;
+
+ if (dev) {
+ target = crm_element_value(dev, F_STONITH_TARGET);
+ if (target && (options & st_opt_cs_nodeid)) {
+ int nodeid;
+ crm_node_t *node;
+
+ pcmk__scan_min_int(target, &nodeid, 0);
+ node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY);
+ if (node) {
+ target = node->uname;
+ }
+ }
+ }
+
+ if (options & st_opt_cleanup) {
+ crm_trace("Cleaning up operations on %s in %p", target,
+ stonith_remote_op_list);
+
+ stonith_fence_history_cleanup(target,
+ crm_element_value(msg, F_STONITH_CALLID) != NULL);
+ } else if (options & st_opt_broadcast) {
+ /* there is no clear sign atm for when a history sync
+ is done so send a notification for anything
+ that smells like history-sync
+ */
+ fenced_send_notification(T_STONITH_NOTIFY_HISTORY_SYNCED, NULL, NULL);
+ if (crm_element_value(msg, F_STONITH_CALLID)) {
+ /* this is coming from the stonith-API
+ *
+ * craft a broadcast with node's history
+ * so that every node can merge and broadcast
+ * what it has on top
+ */
+ out_history = stonith_local_history(TRUE, NULL);
+ crm_trace("Broadcasting history to peers");
+ stonith_send_broadcast_history(out_history,
+ st_opt_broadcast | st_opt_discard_reply,
+ NULL);
+ } else if (remote_peer &&
+ !pcmk__str_eq(remote_peer, stonith_our_uname, pcmk__str_casei)) {
+ xmlNode *history = get_xpath_object("//" F_STONITH_HISTORY_LIST,
+ msg, LOG_NEVER);
+
+ /* either a broadcast created directly upon stonith-API request
+ * or a diff as response to such a thing
+ *
+ * in both cases it may have a history or not
+ * if we have differential data
+ * merge in what we've received and stop
+ * otherwise broadcast what we have on top
+ * marking as differential and merge in afterwards
+ */
+ if (!history || !pcmk__xe_attr_is_true(history, F_STONITH_DIFFERENTIAL)) {
+ GHashTable *received_history = NULL;
+
+ if (history != NULL) {
+ received_history = stonith_xml_history_to_list(history);
+ }
+ out_history =
+ stonith_local_history_diff_and_merge(received_history, TRUE, NULL);
+ if (out_history) {
+ crm_trace("Broadcasting history-diff to peers");
+ pcmk__xe_set_bool_attr(out_history, F_STONITH_DIFFERENTIAL, true);
+ stonith_send_broadcast_history(out_history,
+ st_opt_broadcast | st_opt_discard_reply,
+ NULL);
+ } else {
+ crm_trace("History-diff is empty - skip broadcast");
+ }
+ }
+ } else {
+ crm_trace("Skipping history-query-broadcast (%s%s)"
+ " we sent ourselves",
+ remote_peer?"remote-peer=":"local-ipc",
+ remote_peer?remote_peer:"");
+ }
+ } else {
+ /* plain history request */
+ crm_trace("Looking for operations on %s in %p", target,
+ stonith_remote_op_list);
+ *output = stonith_local_history(FALSE, target);
+ }
+ free_xml(out_history);
+}
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
new file mode 100644
index 0000000..dc67947
--- /dev/null
+++ b/daemons/fenced/fenced_remote.c
@@ -0,0 +1,2509 @@
+/*
+ * Copyright 2009-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/utsname.h>
+
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <regex.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/cluster/internal.h>
+
+#include <crm/stonith-ng.h>
+#include <crm/fencing/internal.h>
+#include <crm/common/xml.h>
+#include <crm/common/xml_internal.h>
+
+#include <crm/common/util.h>
+#include <pacemaker-fenced.h>
+
+#define TIMEOUT_MULTIPLY_FACTOR 1.2
+
+/* When one fencer queries its peers for devices able to handle a fencing
+ * request, each peer will reply with a list of such devices available to it.
+ * Each reply will be parsed into a peer_device_info_t, with each device's
+ * information kept in a device_properties_t.
+ */
+
+typedef struct device_properties_s {
+ /* Whether access to this device has been verified */
+ gboolean verified;
+
+ /* The remaining members are indexed by the operation's "phase" */
+
+ /* Whether this device has been executed in each phase */
+ gboolean executed[st_phase_max];
+ /* Whether this device is disallowed from executing in each phase */
+ gboolean disallowed[st_phase_max];
+ /* Action-specific timeout for each phase */
+ int custom_action_timeout[st_phase_max];
+ /* Action-specific maximum random delay for each phase */
+ int delay_max[st_phase_max];
+ /* Action-specific base delay for each phase */
+ int delay_base[st_phase_max];
+ /* Group of enum st_device_flags */
+ uint32_t device_support_flags;
+} device_properties_t;
+
+typedef struct {
+ /* Name of peer that sent this result */
+ char *host;
+ /* Only try peers for non-topology based operations once */
+ gboolean tried;
+ /* Number of entries in the devices table */
+ int ndevices;
+ /* Devices available to this host that are capable of fencing the target */
+ GHashTable *devices;
+} peer_device_info_t;
+
+GHashTable *stonith_remote_op_list = NULL;
+
+extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
+ int call_options);
+
+static void request_peer_fencing(remote_fencing_op_t *op,
+ peer_device_info_t *peer);
+static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup);
+static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
+static int get_op_total_timeout(const remote_fencing_op_t *op,
+ const peer_device_info_t *chosen_peer);
+
+static gint
+sort_strings(gconstpointer a, gconstpointer b)
+{
+ return strcmp(a, b);
+}
+
+static void
+free_remote_query(gpointer data)
+{
+ if (data != NULL) {
+ peer_device_info_t *peer = data;
+
+ g_hash_table_destroy(peer->devices);
+ free(peer->host);
+ free(peer);
+ }
+}
+
+void
+free_stonith_remote_op_list(void)
+{
+ if (stonith_remote_op_list != NULL) {
+ g_hash_table_destroy(stonith_remote_op_list);
+ stonith_remote_op_list = NULL;
+ }
+}
+
+struct peer_count_data {
+ const remote_fencing_op_t *op;
+ gboolean verified_only;
+ uint32_t support_action_only;
+ int count;
+};
+
+/*!
+ * \internal
+ * \brief Increment a counter if a device has not been executed yet
+ *
+ * \param[in] key Device ID (ignored)
+ * \param[in] value Device properties
+ * \param[in,out] user_data Peer count data
+ */
+static void
+count_peer_device(gpointer key, gpointer value, gpointer user_data)
+{
+ device_properties_t *props = (device_properties_t*)value;
+ struct peer_count_data *data = user_data;
+
+ if (!props->executed[data->op->phase]
+ && (!data->verified_only || props->verified)
+ && ((data->support_action_only == st_device_supports_none) || pcmk_is_set(props->device_support_flags, data->support_action_only))) {
+ ++(data->count);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Check the number of available devices in a peer's query results
+ *
+ * \param[in] op Operation that results are for
+ * \param[in] peer Peer to count
+ * \param[in] verified_only Whether to count only verified devices
+ * \param[in] support_action_only Whether to count only devices that support action
+ *
+ * \return Number of devices available to peer that were not already executed
+ */
+static int
+count_peer_devices(const remote_fencing_op_t *op,
+ const peer_device_info_t *peer, gboolean verified_only, uint32_t support_on_action_only)
+{
+ struct peer_count_data data;
+
+ data.op = op;
+ data.verified_only = verified_only;
+ data.support_action_only = support_on_action_only;
+ data.count = 0;
+ if (peer) {
+ g_hash_table_foreach(peer->devices, count_peer_device, &data);
+ }
+ return data.count;
+}
+
+/*!
+ * \internal
+ * \brief Search for a device in a query result
+ *
+ * \param[in] op Operation that result is for
+ * \param[in] peer Query result for a peer
+ * \param[in] device Device ID to search for
+ *
+ * \return Device properties if found, NULL otherwise
+ */
+static device_properties_t *
+find_peer_device(const remote_fencing_op_t *op, const peer_device_info_t *peer,
+ const char *device, uint32_t support_action_only)
+{
+ device_properties_t *props = g_hash_table_lookup(peer->devices, device);
+
+ if (props && support_action_only != st_device_supports_none && !pcmk_is_set(props->device_support_flags, support_action_only)) {
+ return NULL;
+ }
+ return (props && !props->executed[op->phase]
+ && !props->disallowed[op->phase])? props : NULL;
+}
+
+/*!
+ * \internal
+ * \brief Find a device in a peer's device list and mark it as executed
+ *
+ * \param[in] op Operation that peer result is for
+ * \param[in,out] peer Peer with results to search
+ * \param[in] device ID of device to mark as done
+ * \param[in] verified_devices_only Only consider verified devices
+ *
+ * \return TRUE if device was found and marked, FALSE otherwise
+ */
+static gboolean
+grab_peer_device(const remote_fencing_op_t *op, peer_device_info_t *peer,
+ const char *device, gboolean verified_devices_only)
+{
+ device_properties_t *props = find_peer_device(op, peer, device,
+ fenced_support_flag(op->action));
+
+ if ((props == NULL) || (verified_devices_only && !props->verified)) {
+ return FALSE;
+ }
+
+ crm_trace("Removing %s from %s (%d remaining)",
+ device, peer->host, count_peer_devices(op, peer, FALSE, st_device_supports_none));
+ props->executed[op->phase] = TRUE;
+ return TRUE;
+}
+
+static void
+clear_remote_op_timers(remote_fencing_op_t * op)
+{
+ if (op->query_timer) {
+ g_source_remove(op->query_timer);
+ op->query_timer = 0;
+ }
+ if (op->op_timer_total) {
+ g_source_remove(op->op_timer_total);
+ op->op_timer_total = 0;
+ }
+ if (op->op_timer_one) {
+ g_source_remove(op->op_timer_one);
+ op->op_timer_one = 0;
+ }
+}
+
+static void
+free_remote_op(gpointer data)
+{
+ remote_fencing_op_t *op = data;
+
+ crm_log_xml_debug(op->request, "Destroying");
+
+ clear_remote_op_timers(op);
+
+ free(op->id);
+ free(op->action);
+ free(op->delegate);
+ free(op->target);
+ free(op->client_id);
+ free(op->client_name);
+ free(op->originator);
+
+ if (op->query_results) {
+ g_list_free_full(op->query_results, free_remote_query);
+ }
+ if (op->request) {
+ free_xml(op->request);
+ op->request = NULL;
+ }
+ if (op->devices_list) {
+ g_list_free_full(op->devices_list, free);
+ op->devices_list = NULL;
+ }
+ g_list_free_full(op->automatic_list, free);
+ g_list_free(op->duplicates);
+
+ pcmk__reset_result(&op->result);
+ free(op);
+}
+
+void
+init_stonith_remote_op_hash_table(GHashTable **table)
+{
+ if (*table == NULL) {
+ *table = pcmk__strkey_table(NULL, free_remote_op);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Return an operation's originally requested action (before any remap)
+ *
+ * \param[in] op Operation to check
+ *
+ * \return Operation's original action
+ */
+static const char *
+op_requested_action(const remote_fencing_op_t *op)
+{
+ return ((op->phase > st_phase_requested)? "reboot" : op->action);
+}
+
+/*!
+ * \internal
+ * \brief Remap a "reboot" operation to the "off" phase
+ *
+ * \param[in,out] op Operation to remap
+ */
+static void
+op_phase_off(remote_fencing_op_t *op)
+{
+ crm_info("Remapping multiple-device reboot targeting %s to 'off' "
+ CRM_XS " id=%.8s", op->target, op->id);
+ op->phase = st_phase_off;
+
+ /* Happily, "off" and "on" are shorter than "reboot", so we can reuse the
+ * memory allocation at each phase.
+ */
+ strcpy(op->action, "off");
+}
+
+/*!
+ * \internal
+ * \brief Advance a remapped reboot operation to the "on" phase
+ *
+ * \param[in,out] op Operation to remap
+ */
+static void
+op_phase_on(remote_fencing_op_t *op)
+{
+ GList *iter = NULL;
+
+ crm_info("Remapped 'off' targeting %s complete, "
+ "remapping to 'on' for %s " CRM_XS " id=%.8s",
+ op->target, op->client_name, op->id);
+ op->phase = st_phase_on;
+ strcpy(op->action, "on");
+
+ /* Skip devices with automatic unfencing, because the cluster will handle it
+ * when the node rejoins.
+ */
+ for (iter = op->automatic_list; iter != NULL; iter = iter->next) {
+ GList *match = g_list_find_custom(op->devices_list, iter->data,
+ sort_strings);
+
+ if (match) {
+ op->devices_list = g_list_remove(op->devices_list, match->data);
+ }
+ }
+ g_list_free_full(op->automatic_list, free);
+ op->automatic_list = NULL;
+
+ /* Rewind device list pointer */
+ op->devices = op->devices_list;
+}
+
+/*!
+ * \internal
+ * \brief Reset a remapped reboot operation
+ *
+ * \param[in,out] op Operation to reset
+ */
+static void
+undo_op_remap(remote_fencing_op_t *op)
+{
+ if (op->phase > 0) {
+ crm_info("Undoing remap of reboot targeting %s for %s "
+ CRM_XS " id=%.8s", op->target, op->client_name, op->id);
+ op->phase = st_phase_requested;
+ strcpy(op->action, "reboot");
+ }
+}
+
+/*!
+ * \internal
+ * \brief Create notification data XML for a fencing operation result
+ *
+ * \param[in] op Fencer operation that completed
+ *
+ * \return Newly created XML to add as notification data
+ * \note The caller is responsible for freeing the result.
+ */
+static xmlNode *
+fencing_result2xml(const remote_fencing_op_t *op)
+{
+ xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
+
+ crm_xml_add_int(notify_data, "state", op->state);
+ crm_xml_add(notify_data, F_STONITH_TARGET, op->target);
+ crm_xml_add(notify_data, F_STONITH_ACTION, op->action);
+ crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate);
+ crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id);
+ crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator);
+ crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id);
+ crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name);
+
+ return notify_data;
+}
+
+/*!
+ * \internal
+ * \brief Broadcast a fence result notification to all CPG peers
+ *
+ * \param[in] op Fencer operation that completed
+ * \param[in] op_merged Whether this operation is a duplicate of another
+ */
+void
+fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged)
+{
+ static int count = 0;
+ xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
+ xmlNode *notify_data = fencing_result2xml(op);
+
+ count++;
+ crm_trace("Broadcasting result to peers");
+ crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY);
+ crm_xml_add(bcast, F_SUBTYPE, "broadcast");
+ crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY);
+ crm_xml_add_int(bcast, "count", count);
+
+ if (op_merged) {
+ pcmk__xe_set_bool_attr(bcast, F_STONITH_MERGED, true);
+ }
+
+ stonith__xe_set_result(notify_data, &op->result);
+
+ add_message_xml(bcast, F_STONITH_CALLDATA, notify_data);
+ send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);
+ free_xml(notify_data);
+ free_xml(bcast);
+
+ return;
+}
+
+/*!
+ * \internal
+ * \brief Reply to a local request originator and notify all subscribed clients
+ *
+ * \param[in,out] op Fencer operation that completed
+ * \param[in,out] data Top-level XML to add notification to
+ */
+static void
+handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data)
+{
+ xmlNode *notify_data = NULL;
+ xmlNode *reply = NULL;
+ pcmk__client_t *client = NULL;
+
+ if (op->notify_sent == TRUE) {
+ /* nothing to do */
+ return;
+ }
+
+ /* Do notification with a clean data object */
+ crm_xml_add_int(data, "state", op->state);
+ crm_xml_add(data, F_STONITH_TARGET, op->target);
+ crm_xml_add(data, F_STONITH_OPERATION, op->action);
+
+ reply = fenced_construct_reply(op->request, data, &op->result);
+ crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
+
+ /* Send fencing OP reply to local client that initiated fencing */
+ client = pcmk__find_client_by_id(op->client_id);
+ if (client == NULL) {
+ crm_trace("Skipping reply to %s: no longer a client", op->client_id);
+ } else {
+ do_local_reply(reply, client, op->call_options);
+ }
+
+ /* bcast to all local clients that the fencing operation happend */
+ notify_data = fencing_result2xml(op);
+ fenced_send_notification(T_STONITH_NOTIFY_FENCE, &op->result, notify_data);
+ free_xml(notify_data);
+ fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
+
+ /* mark this op as having notify's already sent */
+ op->notify_sent = TRUE;
+ free_xml(reply);
+}
+
+/*!
+ * \internal
+ * \brief Finalize all duplicates of a given fencer operation
+ *
+ * \param[in,out] op Fencer operation that completed
+ * \param[in,out] data Top-level XML to add notification to
+ */
+static void
+finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data)
+{
+ for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) {
+ remote_fencing_op_t *other = iter->data;
+
+ if (other->state == st_duplicate) {
+ other->state = op->state;
+ crm_debug("Performing duplicate notification for %s@%s: %s "
+ CRM_XS " id=%.8s",
+ other->client_name, other->originator,
+ pcmk_exec_status_str(op->result.execution_status),
+ other->id);
+ pcmk__copy_result(&op->result, &other->result);
+ finalize_op(other, data, true);
+
+ } else {
+ // Possible if (for example) it timed out already
+ crm_err("Skipping duplicate notification for %s@%s "
+ CRM_XS " state=%s id=%.8s",
+ other->client_name, other->originator,
+ stonith_op_state_str(other->state), other->id);
+ }
+ }
+}
+
+static char *
+delegate_from_xml(xmlNode *xml)
+{
+ xmlNode *match = get_xpath_object("//@" F_STONITH_DELEGATE, xml, LOG_NEVER);
+
+ if (match == NULL) {
+ return crm_element_value_copy(xml, F_ORIG);
+ } else {
+ return crm_element_value_copy(match, F_STONITH_DELEGATE);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Finalize a peer fencing operation
+ *
+ * Clean up after a fencing operation completes. This function has two code
+ * paths: the executioner uses it to broadcast the result to CPG peers, and then
+ * each peer (including the executioner) uses it to process that broadcast and
+ * notify its IPC clients of the result.
+ *
+ * \param[in,out] op Fencer operation that completed
+ * \param[in,out] data If not NULL, XML reply of last delegated operation
+ * \param[in] dup Whether this operation is a duplicate of another
+ * (in which case, do not broadcast the result)
+ *
+ * \note The operation result should be set before calling this function.
+ */
+static void
+finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
+{
+ int level = LOG_ERR;
+ const char *subt = NULL;
+ xmlNode *local_data = NULL;
+ gboolean op_merged = FALSE;
+
+ CRM_CHECK((op != NULL), return);
+
+ // This is a no-op if timers have already been cleared
+ clear_remote_op_timers(op);
+
+ if (op->notify_sent) {
+ // Most likely, this is a timed-out action that eventually completed
+ crm_notice("Operation '%s'%s%s by %s for %s@%s%s: "
+ "Result arrived too late " CRM_XS " id=%.8s",
+ op->action, (op->target? " targeting " : ""),
+ (op->target? op->target : ""),
+ (op->delegate? op->delegate : "unknown node"),
+ op->client_name, op->originator,
+ (op_merged? " (merged)" : ""),
+ op->id);
+ return;
+ }
+
+ set_fencing_completed(op);
+ undo_op_remap(op);
+
+ if (data == NULL) {
+ data = create_xml_node(NULL, "remote-op");
+ local_data = data;
+
+ } else if (op->delegate == NULL) {
+ switch (op->result.execution_status) {
+ case PCMK_EXEC_NO_FENCE_DEVICE:
+ break;
+
+ case PCMK_EXEC_INVALID:
+ if (op->result.exit_status != CRM_EX_EXPIRED) {
+ op->delegate = delegate_from_xml(data);
+ }
+ break;
+
+ default:
+ op->delegate = delegate_from_xml(data);
+ break;
+ }
+ }
+
+ if (dup || (crm_element_value(data, F_STONITH_MERGED) != NULL)) {
+ op_merged = true;
+ }
+
+ /* Tell everyone the operation is done, we will continue
+ * with doing the local notifications once we receive
+ * the broadcast back. */
+ subt = crm_element_value(data, F_SUBTYPE);
+ if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) {
+ /* Defer notification until the bcast message arrives */
+ fenced_broadcast_op_result(op, op_merged);
+ free_xml(local_data);
+ return;
+ }
+
+ if (pcmk__result_ok(&op->result) || dup
+ || !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
+ level = LOG_NOTICE;
+ }
+ do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s (%s%s%s) "
+ CRM_XS " id=%.8s", op->action, (op->target? " targeting " : ""),
+ (op->target? op->target : ""),
+ (op->delegate? op->delegate : "unknown node"),
+ op->client_name, op->originator,
+ (op_merged? " (merged)" : ""),
+ crm_exit_str(op->result.exit_status),
+ pcmk_exec_status_str(op->result.execution_status),
+ ((op->result.exit_reason == NULL)? "" : ": "),
+ ((op->result.exit_reason == NULL)? "" : op->result.exit_reason),
+ op->id);
+
+ handle_local_reply_and_notify(op, data);
+
+ if (!dup) {
+ finalize_op_duplicates(op, data);
+ }
+
+ /* Free non-essential parts of the record
+ * Keep the record around so we can query the history
+ */
+ if (op->query_results) {
+ g_list_free_full(op->query_results, free_remote_query);
+ op->query_results = NULL;
+ }
+ if (op->request) {
+ free_xml(op->request);
+ op->request = NULL;
+ }
+
+ free_xml(local_data);
+}
+
+/*!
+ * \internal
+ * \brief Finalize a watchdog fencer op after the waiting time expires
+ *
+ * \param[in,out] userdata Fencer operation that completed
+ *
+ * \return G_SOURCE_REMOVE (which tells glib not to restart timer)
+ */
+static gboolean
+remote_op_watchdog_done(gpointer userdata)
+{
+ remote_fencing_op_t *op = userdata;
+
+ op->op_timer_one = 0;
+
+ crm_notice("Self-fencing (%s) by %s for %s assumed complete "
+ CRM_XS " id=%.8s",
+ op->action, op->target, op->client_name, op->id);
+ op->state = st_done;
+ pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ finalize_op(op, NULL, false);
+ return G_SOURCE_REMOVE;
+}
+
+static gboolean
+remote_op_timeout_one(gpointer userdata)
+{
+ remote_fencing_op_t *op = userdata;
+
+ op->op_timer_one = 0;
+
+ crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
+ " id=%.8s", op->action, op->target, op->client_name, op->id);
+ pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
+ "Peer did not return fence result within timeout");
+
+ // The requested delay has been applied for the first device
+ if (op->delay > 0) {
+ op->delay = 0;
+ crm_trace("Try another device for '%s' action targeting %s "
+ "for client %s without delay " CRM_XS " id=%.8s",
+ op->action, op->target, op->client_name, op->id);
+ }
+
+ // Try another device, if appropriate
+ request_peer_fencing(op, NULL);
+ return G_SOURCE_REMOVE;
+}
+
+/*!
+ * \internal
+ * \brief Finalize a remote fencer operation that timed out
+ *
+ * \param[in,out] op Fencer operation that timed out
+ * \param[in] reason Readable description of what step timed out
+ */
+static void
+finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
+{
+ crm_debug("Action '%s' targeting %s for client %s timed out "
+ CRM_XS " id=%.8s",
+ op->action, op->target, op->client_name, op->id);
+
+ if (op->phase == st_phase_on) {
+ /* A remapped reboot operation timed out in the "on" phase, but the
+ * "off" phase completed successfully, so quit trying any further
+ * devices, and return success.
+ */
+ op->state = st_done;
+ pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ } else {
+ op->state = st_failed;
+ pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
+ }
+ finalize_op(op, NULL, false);
+}
+
+/*!
+ * \internal
+ * \brief Finalize a remote fencer operation that timed out
+ *
+ * \param[in,out] userdata Fencer operation that timed out
+ *
+ * \return G_SOURCE_REMOVE (which tells glib not to restart timer)
+ */
+static gboolean
+remote_op_timeout(gpointer userdata)
+{
+ remote_fencing_op_t *op = userdata;
+
+ op->op_timer_total = 0;
+
+ if (op->state == st_done) {
+ crm_debug("Action '%s' targeting %s for client %s already completed "
+ CRM_XS " id=%.8s",
+ op->action, op->target, op->client_name, op->id);
+ } else {
+ finalize_timed_out_op(userdata, "Fencing did not complete within a "
+ "total timeout based on the "
+ "configured timeout and retries for "
+ "any devices attempted");
+ }
+ return G_SOURCE_REMOVE;
+}
+
+static gboolean
+remote_op_query_timeout(gpointer data)
+{
+ remote_fencing_op_t *op = data;
+
+ op->query_timer = 0;
+
+ if (op->state == st_done) {
+ crm_debug("Operation %.8s targeting %s already completed",
+ op->id, op->target);
+ } else if (op->state == st_exec) {
+ crm_debug("Operation %.8s targeting %s already in progress",
+ op->id, op->target);
+ } else if (op->query_results) {
+ // Query succeeded, so attempt the actual fencing
+ crm_debug("Query %.8s targeting %s complete (state=%s)",
+ op->id, op->target, stonith_op_state_str(op->state));
+ request_peer_fencing(op, NULL);
+ } else {
+ crm_debug("Query %.8s targeting %s timed out (state=%s)",
+ op->id, op->target, stonith_op_state_str(op->state));
+ finalize_timed_out_op(op, "No capable peers replied to device query "
+ "within timeout");
+ }
+
+ return G_SOURCE_REMOVE;
+}
+
+static gboolean
+topology_is_empty(stonith_topology_t *tp)
+{
+ int i;
+
+ if (tp == NULL) {
+ return TRUE;
+ }
+
+ for (i = 0; i < ST_LEVEL_MAX; i++) {
+ if (tp->levels[i] != NULL) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+/*!
+ * \internal
+ * \brief Add a device to an operation's automatic unfencing list
+ *
+ * \param[in,out] op Operation to modify
+ * \param[in] device Device ID to add
+ */
+static void
+add_required_device(remote_fencing_op_t *op, const char *device)
+{
+ GList *match = g_list_find_custom(op->automatic_list, device,
+ sort_strings);
+
+ if (!match) {
+ op->automatic_list = g_list_prepend(op->automatic_list, strdup(device));
+ }
+}
+
+/*!
+ * \internal
+ * \brief Remove a device from the automatic unfencing list
+ *
+ * \param[in,out] op Operation to modify
+ * \param[in] device Device ID to remove
+ */
+static void
+remove_required_device(remote_fencing_op_t *op, const char *device)
+{
+ GList *match = g_list_find_custom(op->automatic_list, device,
+ sort_strings);
+
+ if (match) {
+ op->automatic_list = g_list_remove(op->automatic_list, match->data);
+ }
+}
+
+/* deep copy the device list */
+static void
+set_op_device_list(remote_fencing_op_t * op, GList *devices)
+{
+ GList *lpc = NULL;
+
+ if (op->devices_list) {
+ g_list_free_full(op->devices_list, free);
+ op->devices_list = NULL;
+ }
+ for (lpc = devices; lpc != NULL; lpc = lpc->next) {
+ op->devices_list = g_list_append(op->devices_list, strdup(lpc->data));
+ }
+ op->devices = op->devices_list;
+}
+
+/*!
+ * \internal
+ * \brief Check whether a node matches a topology target
+ *
+ * \param[in] tp Topology table entry to check
+ * \param[in] node Name of node to check
+ *
+ * \return TRUE if node matches topology target
+ */
+static gboolean
+topology_matches(const stonith_topology_t *tp, const char *node)
+{
+ regex_t r_patt;
+
+ CRM_CHECK(node && tp && tp->target, return FALSE);
+ switch (tp->kind) {
+ case fenced_target_by_attribute:
+ /* This level targets by attribute, so tp->target is a NAME=VALUE pair
+ * of a permanent attribute applied to targeted nodes. The test below
+ * relies on the locally cached copy of the CIB, so if fencing needs to
+ * be done before the initial CIB is received or after a malformed CIB
+ * is received, then the topology will be unable to be used.
+ */
+ if (node_has_attr(node, tp->target_attribute, tp->target_value)) {
+ crm_notice("Matched %s with %s by attribute", node, tp->target);
+ return TRUE;
+ }
+ break;
+
+ case fenced_target_by_pattern:
+ /* This level targets node names matching a pattern, so tp->target
+ * (and tp->target_pattern) is a regular expression.
+ */
+ if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) {
+ crm_info("Bad regex '%s' for fencing level", tp->target);
+ } else {
+ int status = regexec(&r_patt, node, 0, NULL, 0);
+
+ regfree(&r_patt);
+ if (status == 0) {
+ crm_notice("Matched %s with %s by name", node, tp->target);
+ return TRUE;
+ }
+ }
+ break;
+
+ case fenced_target_by_name:
+ crm_trace("Testing %s against %s", node, tp->target);
+ return pcmk__str_eq(tp->target, node, pcmk__str_casei);
+
+ default:
+ break;
+ }
+ crm_trace("No match for %s with %s", node, tp->target);
+ return FALSE;
+}
+
+stonith_topology_t *
+find_topology_for_host(const char *host)
+{
+ GHashTableIter tIter;
+ stonith_topology_t *tp = g_hash_table_lookup(topology, host);
+
+ if(tp != NULL) {
+ crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
+ return tp;
+ }
+
+ g_hash_table_iter_init(&tIter, topology);
+ while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) {
+ if (topology_matches(tp, host)) {
+ crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
+ return tp;
+ }
+ }
+
+ crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology));
+ return NULL;
+}
+
+/*!
+ * \internal
+ * \brief Set fencing operation's device list to target's next topology level
+ *
+ * \param[in,out] op Remote fencing operation to modify
+ * \param[in] empty_ok If true, an operation without a target (i.e.
+ * queries) or a target without a topology will get a
+ * pcmk_rc_ok return value instead of ENODEV
+ *
+ * \return Standard Pacemaker return value
+ */
+static int
+advance_topology_level(remote_fencing_op_t *op, bool empty_ok)
+{
+ stonith_topology_t *tp = NULL;
+
+ if (op->target) {
+ tp = find_topology_for_host(op->target);
+ }
+ if (topology_is_empty(tp)) {
+ return empty_ok? pcmk_rc_ok : ENODEV;
+ }
+
+ CRM_ASSERT(tp->levels != NULL);
+
+ stonith__set_call_options(op->call_options, op->id, st_opt_topology);
+
+ /* This is a new level, so undo any remapping left over from previous */
+ undo_op_remap(op);
+
+ do {
+ op->level++;
+
+ } while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL);
+
+ if (op->level < ST_LEVEL_MAX) {
+ crm_trace("Attempting fencing level %d targeting %s (%d devices) "
+ "for client %s@%s (id=%.8s)",
+ op->level, op->target, g_list_length(tp->levels[op->level]),
+ op->client_name, op->originator, op->id);
+ set_op_device_list(op, tp->levels[op->level]);
+
+ // The requested delay has been applied for the first fencing level
+ if (op->level > 1 && op->delay > 0) {
+ op->delay = 0;
+ }
+
+ if ((g_list_next(op->devices_list) != NULL)
+ && pcmk__str_eq(op->action, "reboot", pcmk__str_none)) {
+ /* A reboot has been requested for a topology level with multiple
+ * devices. Instead of rebooting the devices sequentially, we will
+ * turn them all off, then turn them all on again. (Think about
+ * switched power outlets for redundant power supplies.)
+ */
+ op_phase_off(op);
+ }
+ return pcmk_rc_ok;
+ }
+
+ crm_info("All %sfencing options targeting %s for client %s@%s failed "
+ CRM_XS " id=%.8s",
+ (stonith_watchdog_timeout_ms > 0)?"non-watchdog ":"",
+ op->target, op->client_name, op->originator, op->id);
+ return ENODEV;
+}
+
+/*!
+ * \internal
+ * \brief If fencing operation is a duplicate, merge it into the other one
+ *
+ * \param[in,out] op Fencing operation to check
+ */
+static void
+merge_duplicates(remote_fencing_op_t *op)
+{
+ GHashTableIter iter;
+ remote_fencing_op_t *other = NULL;
+
+ time_t now = time(NULL);
+
+ g_hash_table_iter_init(&iter, stonith_remote_op_list);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) {
+ const char *other_action = op_requested_action(other);
+
+ if (!strcmp(op->id, other->id)) {
+ continue; // Don't compare against self
+ }
+ if (other->state > st_exec) {
+ crm_trace("%.8s not duplicate of %.8s: not in progress",
+ op->id, other->id);
+ continue;
+ }
+ if (!pcmk__str_eq(op->target, other->target, pcmk__str_casei)) {
+ crm_trace("%.8s not duplicate of %.8s: node %s vs. %s",
+ op->id, other->id, op->target, other->target);
+ continue;
+ }
+ if (!pcmk__str_eq(op->action, other_action, pcmk__str_none)) {
+ crm_trace("%.8s not duplicate of %.8s: action %s vs. %s",
+ op->id, other->id, op->action, other_action);
+ continue;
+ }
+ if (pcmk__str_eq(op->client_name, other->client_name, pcmk__str_casei)) {
+ crm_trace("%.8s not duplicate of %.8s: same client %s",
+ op->id, other->id, op->client_name);
+ continue;
+ }
+ if (pcmk__str_eq(other->target, other->originator, pcmk__str_casei)) {
+ crm_trace("%.8s not duplicate of %.8s: suicide for %s",
+ op->id, other->id, other->target);
+ continue;
+ }
+ if (!fencing_peer_active(crm_get_peer(0, other->originator))) {
+ crm_notice("Failing action '%s' targeting %s originating from "
+ "client %s@%s: Originator is dead " CRM_XS " id=%.8s",
+ other->action, other->target, other->client_name,
+ other->originator, other->id);
+ crm_trace("%.8s not duplicate of %.8s: originator dead",
+ op->id, other->id);
+ other->state = st_failed;
+ continue;
+ }
+ if ((other->total_timeout > 0)
+ && (now > (other->total_timeout + other->created))) {
+ crm_trace("%.8s not duplicate of %.8s: old (%ld vs. %ld + %d)",
+ op->id, other->id, now, other->created,
+ other->total_timeout);
+ continue;
+ }
+
+ /* There is another in-flight request to fence the same host
+ * Piggyback on that instead. If it fails, so do we.
+ */
+ other->duplicates = g_list_append(other->duplicates, op);
+ if (other->total_timeout == 0) {
+ other->total_timeout = op->total_timeout =
+ TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL);
+ crm_trace("Best guess as to timeout used for %.8s: %d",
+ other->id, other->total_timeout);
+ }
+ crm_notice("Merging fencing action '%s' targeting %s originating from "
+ "client %s with identical request from %s@%s "
+ CRM_XS " original=%.8s duplicate=%.8s total_timeout=%ds",
+ op->action, op->target, op->client_name,
+ other->client_name, other->originator,
+ op->id, other->id, other->total_timeout);
+ report_timeout_period(op, other->total_timeout);
+ op->state = st_duplicate;
+ }
+}
+
+static uint32_t fencing_active_peers(void)
+{
+ uint32_t count = 0;
+ crm_node_t *entry;
+ GHashTableIter gIter;
+
+ g_hash_table_iter_init(&gIter, crm_peer_cache);
+ while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
+ if(fencing_peer_active(entry)) {
+ count++;
+ }
+ }
+ return count;
+}
+
+/*!
+ * \internal
+ * \brief Process a manual confirmation of a pending fence action
+ *
+ * \param[in] client IPC client that sent confirmation
+ * \param[in,out] msg Request XML with manual confirmation
+ *
+ * \return Standard Pacemaker return code
+ */
+int
+fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg)
+{
+ remote_fencing_op_t *op = NULL;
+ xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
+
+ CRM_CHECK(dev != NULL, return EPROTO);
+
+ crm_notice("Received manual confirmation that %s has been fenced",
+ pcmk__s(crm_element_value(dev, F_STONITH_TARGET),
+ "unknown target"));
+ op = initiate_remote_stonith_op(client, msg, TRUE);
+ if (op == NULL) {
+ return EPROTO;
+ }
+ op->state = st_done;
+ set_fencing_completed(op);
+ op->delegate = strdup("a human");
+
+ // For the fencer's purposes, the fencing operation is done
+ pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ finalize_op(op, msg, false);
+
+ /* For the requester's purposes, the operation is still pending. The
+ * actual result will be sent asynchronously via the operation's done_cb().
+ */
+ return EINPROGRESS;
+}
+
+/*!
+ * \internal
+ * \brief Create a new remote stonith operation
+ *
+ * \param[in] client ID of local stonith client that initiated the operation
+ * \param[in] request The request from the client that started the operation
+ * \param[in] peer TRUE if this operation is owned by another stonith peer
+ * (an operation owned by one peer is stored on all peers,
+ * but only the owner executes it; all nodes get the results
+ * once the owner finishes execution)
+ */
+void *
+create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer)
+{
+ remote_fencing_op_t *op = NULL;
+ xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_NEVER);
+ int call_options = 0;
+ const char *operation = NULL;
+
+ init_stonith_remote_op_hash_table(&stonith_remote_op_list);
+
+ /* If this operation is owned by another node, check to make
+ * sure we haven't already created this operation. */
+ if (peer && dev) {
+ const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
+
+ CRM_CHECK(op_id != NULL, return NULL);
+
+ op = g_hash_table_lookup(stonith_remote_op_list, op_id);
+ if (op) {
+ crm_debug("Reusing existing remote fencing op %.8s for %s",
+ op_id, ((client == NULL)? "unknown client" : client));
+ return op;
+ }
+ }
+
+ op = calloc(1, sizeof(remote_fencing_op_t));
+ CRM_ASSERT(op != NULL);
+
+ crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout));
+ // Value -1 means disable any static/random fencing delays
+ crm_element_value_int(request, F_STONITH_DELAY, &(op->delay));
+
+ if (peer && dev) {
+ op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID);
+ } else {
+ op->id = crm_generate_uuid();
+ }
+
+ g_hash_table_replace(stonith_remote_op_list, op->id, op);
+
+ op->state = st_query;
+ op->replies_expected = fencing_active_peers();
+ op->action = crm_element_value_copy(dev, F_STONITH_ACTION);
+ op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN);
+ op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE); /* May not be set */
+ op->created = time(NULL);
+
+ if (op->originator == NULL) {
+ /* Local or relayed request */
+ op->originator = strdup(stonith_our_uname);
+ }
+
+ CRM_LOG_ASSERT(client != NULL);
+ if (client) {
+ op->client_id = strdup(client);
+ }
+
+
+ /* For a RELAY operation, set fenced on the client. */
+ operation = crm_element_value(request, F_STONITH_OPERATION);
+
+ if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) {
+ op->client_name = crm_strdup_printf("%s.%lu", crm_system_name,
+ (unsigned long) getpid());
+ } else {
+ op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME);
+ }
+
+ op->target = crm_element_value_copy(dev, F_STONITH_TARGET);
+ op->request = copy_xml(request); /* TODO: Figure out how to avoid this */
+ crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
+ op->call_options = call_options;
+
+ crm_element_value_int(request, F_STONITH_CALLID, &(op->client_callid));
+
+ crm_trace("%s new fencing op %s ('%s' targeting %s for client %s, "
+ "base timeout %d, %u %s expected)",
+ (peer && dev)? "Recorded" : "Generated", op->id, op->action,
+ op->target, op->client_name, op->base_timeout,
+ op->replies_expected,
+ pcmk__plural_alt(op->replies_expected, "reply", "replies"));
+
+ if (op->call_options & st_opt_cs_nodeid) {
+ int nodeid;
+ crm_node_t *node;
+
+ pcmk__scan_min_int(op->target, &nodeid, 0);
+ node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY);
+
+ /* Ensure the conversion only happens once */
+ stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid);
+
+ if (node && node->uname) {
+ free(op->target);
+ op->target = strdup(node->uname);
+
+ } else {
+ crm_warn("Could not expand nodeid '%s' into a host name", op->target);
+ }
+ }
+
+ /* check to see if this is a duplicate operation of another in-flight operation */
+ merge_duplicates(op);
+
+ if (op->state != st_duplicate) {
+ /* kick history readers */
+ fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
+ }
+
+ /* safe to trim as long as that doesn't touch pending ops */
+ stonith_fence_history_trim();
+
+ return op;
+}
+
+/*!
+ * \internal
+ * \brief Create a peer fencing operation from a request, and initiate it
+ *
+ * \param[in] client IPC client that made request (NULL to get from request)
+ * \param[in] request Request XML
+ * \param[in] manual_ack Whether this is a manual action confirmation
+ *
+ * \return Newly created operation on success, otherwise NULL
+ */
+remote_fencing_op_t *
+initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request,
+ gboolean manual_ack)
+{
+ int query_timeout = 0;
+ xmlNode *query = NULL;
+ const char *client_id = NULL;
+ remote_fencing_op_t *op = NULL;
+ const char *relay_op_id = NULL;
+ const char *operation = NULL;
+
+ if (client) {
+ client_id = client->id;
+ } else {
+ client_id = crm_element_value(request, F_STONITH_CLIENTID);
+ }
+
+ CRM_LOG_ASSERT(client_id != NULL);
+ op = create_remote_stonith_op(client_id, request, FALSE);
+ op->owner = TRUE;
+ if (manual_ack) {
+ return op;
+ }
+
+ CRM_CHECK(op->action, return NULL);
+
+ if (advance_topology_level(op, true) != pcmk_rc_ok) {
+ op->state = st_failed;
+ }
+
+ switch (op->state) {
+ case st_failed:
+ // advance_topology_level() exhausted levels
+ pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
+ "All topology levels failed");
+ crm_warn("Could not request peer fencing (%s) targeting %s "
+ CRM_XS " id=%.8s", op->action, op->target, op->id);
+ finalize_op(op, NULL, false);
+ return op;
+
+ case st_duplicate:
+ crm_info("Requesting peer fencing (%s) targeting %s (duplicate) "
+ CRM_XS " id=%.8s", op->action, op->target, op->id);
+ return op;
+
+ default:
+ crm_notice("Requesting peer fencing (%s) targeting %s "
+ CRM_XS " id=%.8s state=%s base_timeout=%d",
+ op->action, op->target, op->id,
+ stonith_op_state_str(op->state), op->base_timeout);
+ }
+
+ query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY,
+ NULL, op->call_options);
+
+ crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id);
+ crm_xml_add(query, F_STONITH_TARGET, op->target);
+ crm_xml_add(query, F_STONITH_ACTION, op_requested_action(op));
+ crm_xml_add(query, F_STONITH_ORIGIN, op->originator);
+ crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
+ crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
+ crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout);
+
+ /* In case of RELAY operation, RELAY information is added to the query to delete the original operation of RELAY. */
+ operation = crm_element_value(request, F_STONITH_OPERATION);
+ if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) {
+ relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID);
+ if (relay_op_id) {
+ crm_xml_add(query, F_STONITH_REMOTE_OP_ID_RELAY, relay_op_id);
+ }
+ }
+
+ send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
+ free_xml(query);
+
+ query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR;
+ op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op);
+
+ return op;
+}
+
+enum find_best_peer_options {
+ /*! Skip checking the target peer for capable fencing devices */
+ FIND_PEER_SKIP_TARGET = 0x0001,
+ /*! Only check the target peer for capable fencing devices */
+ FIND_PEER_TARGET_ONLY = 0x0002,
+ /*! Skip peers and devices that are not verified */
+ FIND_PEER_VERIFIED_ONLY = 0x0004,
+};
+
+static peer_device_info_t *
+find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options)
+{
+ GList *iter = NULL;
+ gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE;
+
+ if (!device && pcmk_is_set(op->call_options, st_opt_topology)) {
+ return NULL;
+ }
+
+ for (iter = op->query_results; iter != NULL; iter = iter->next) {
+ peer_device_info_t *peer = iter->data;
+
+ crm_trace("Testing result from %s targeting %s with %d device%s: %d %x",
+ peer->host, op->target, peer->ndevices,
+ pcmk__plural_s(peer->ndevices), peer->tried, options);
+ if ((options & FIND_PEER_SKIP_TARGET) && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
+ continue;
+ }
+ if ((options & FIND_PEER_TARGET_ONLY) && !pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
+ continue;
+ }
+
+ if (pcmk_is_set(op->call_options, st_opt_topology)) {
+
+ if (grab_peer_device(op, peer, device, verified_devices_only)) {
+ return peer;
+ }
+
+ } else if (!peer->tried
+ && count_peer_devices(op, peer, verified_devices_only,
+ fenced_support_flag(op->action))) {
+ /* No topology: Use the current best peer */
+ crm_trace("Simple fencing");
+ return peer;
+ }
+ }
+
+ return NULL;
+}
+
+static peer_device_info_t *
+stonith_choose_peer(remote_fencing_op_t * op)
+{
+ const char *device = NULL;
+ peer_device_info_t *peer = NULL;
+ uint32_t active = fencing_active_peers();
+
+ do {
+ if (op->devices) {
+ device = op->devices->data;
+ crm_trace("Checking for someone to fence (%s) %s using %s",
+ op->action, op->target, device);
+ } else {
+ crm_trace("Checking for someone to fence (%s) %s",
+ op->action, op->target);
+ }
+
+ /* Best choice is a peer other than the target with verified access */
+ peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY);
+ if (peer) {
+ crm_trace("Found verified peer %s for %s", peer->host, device?device:"<any>");
+ return peer;
+ }
+
+ if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) {
+ crm_trace("Waiting before looking for unverified devices to fence %s", op->target);
+ return NULL;
+ }
+
+ /* If no other peer has verified access, next best is unverified access */
+ peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET);
+ if (peer) {
+ crm_trace("Found best unverified peer %s", peer->host);
+ return peer;
+ }
+
+ /* If no other peer can do it, last option is self-fencing
+ * (which is never allowed for the "on" phase of a remapped reboot)
+ */
+ if (op->phase != st_phase_on) {
+ peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY);
+ if (peer) {
+ crm_trace("%s will fence itself", peer->host);
+ return peer;
+ }
+ }
+
+ /* Try the next fencing level if there is one (unless we're in the "on"
+ * phase of a remapped "reboot", because we ignore errors in that case)
+ */
+ } while ((op->phase != st_phase_on)
+ && pcmk_is_set(op->call_options, st_opt_topology)
+ && (advance_topology_level(op, false) == pcmk_rc_ok));
+
+ if ((stonith_watchdog_timeout_ms > 0)
+ && pcmk__is_fencing_action(op->action)
+ && pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none)
+ && node_does_watchdog_fencing(op->target)) {
+ crm_info("Couldn't contact watchdog-fencing target-node (%s)",
+ op->target);
+ /* check_watchdog_fencing_and_wait will log additional info */
+ } else {
+ crm_notice("Couldn't find anyone to fence (%s) %s using %s",
+ op->action, op->target, (device? device : "any device"));
+ }
+ return NULL;
+}
+
+static int
+get_device_timeout(const remote_fencing_op_t *op,
+ const peer_device_info_t *peer, const char *device,
+ bool with_delay)
+{
+ device_properties_t *props;
+ int delay = 0;
+
+ if (!peer || !device) {
+ return op->base_timeout;
+ }
+
+ props = g_hash_table_lookup(peer->devices, device);
+ if (!props) {
+ return op->base_timeout;
+ }
+
+ // op->delay < 0 means disable any static/random fencing delays
+ if (with_delay && op->delay >= 0) {
+ // delay_base is eventually limited by delay_max
+ delay = (props->delay_max[op->phase] > 0 ?
+ props->delay_max[op->phase] : props->delay_base[op->phase]);
+ }
+
+ return (props->custom_action_timeout[op->phase]?
+ props->custom_action_timeout[op->phase] : op->base_timeout)
+ + delay;
+}
+
+struct timeout_data {
+ const remote_fencing_op_t *op;
+ const peer_device_info_t *peer;
+ int total_timeout;
+};
+
+/*!
+ * \internal
+ * \brief Add timeout to a total if device has not been executed yet
+ *
+ * \param[in] key GHashTable key (device ID)
+ * \param[in] value GHashTable value (device properties)
+ * \param[in,out] user_data Timeout data
+ */
+static void
+add_device_timeout(gpointer key, gpointer value, gpointer user_data)
+{
+ const char *device_id = key;
+ device_properties_t *props = value;
+ struct timeout_data *timeout = user_data;
+
+ if (!props->executed[timeout->op->phase]
+ && !props->disallowed[timeout->op->phase]) {
+ timeout->total_timeout += get_device_timeout(timeout->op, timeout->peer,
+ device_id, true);
+ }
+}
+
+static int
+get_peer_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer)
+{
+ struct timeout_data timeout;
+
+ timeout.op = op;
+ timeout.peer = peer;
+ timeout.total_timeout = 0;
+
+ g_hash_table_foreach(peer->devices, add_device_timeout, &timeout);
+
+ return (timeout.total_timeout? timeout.total_timeout : op->base_timeout);
+}
+
+static int
+get_op_total_timeout(const remote_fencing_op_t *op,
+ const peer_device_info_t *chosen_peer)
+{
+ int total_timeout = 0;
+ stonith_topology_t *tp = find_topology_for_host(op->target);
+
+ if (pcmk_is_set(op->call_options, st_opt_topology) && tp) {
+ int i;
+ GList *device_list = NULL;
+ GList *iter = NULL;
+ GList *auto_list = NULL;
+
+ if (pcmk__str_eq(op->action, "on", pcmk__str_none)
+ && (op->automatic_list != NULL)) {
+ auto_list = g_list_copy(op->automatic_list);
+ }
+
+ /* Yep, this looks scary, nested loops all over the place.
+ * Here is what is going on.
+ * Loop1: Iterate through fencing levels.
+ * Loop2: If a fencing level has devices, loop through each device
+ * Loop3: For each device in a fencing level, see what peer owns it
+ * and what that peer has reported the timeout is for the device.
+ */
+ for (i = 0; i < ST_LEVEL_MAX; i++) {
+ if (!tp->levels[i]) {
+ continue;
+ }
+ for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
+ /* in case of watchdog-device we add the timeout to the budget
+ regardless of if we got a reply or not
+ */
+ if ((stonith_watchdog_timeout_ms > 0)
+ && pcmk__is_fencing_action(op->action)
+ && pcmk__str_eq(device_list->data, STONITH_WATCHDOG_ID,
+ pcmk__str_none)
+ && node_does_watchdog_fencing(op->target)) {
+ total_timeout += stonith_watchdog_timeout_ms / 1000;
+ continue;
+ }
+
+ for (iter = op->query_results; iter != NULL; iter = iter->next) {
+ const peer_device_info_t *peer = iter->data;
+
+ if (auto_list) {
+ GList *match = g_list_find_custom(auto_list, device_list->data,
+ sort_strings);
+ if (match) {
+ auto_list = g_list_remove(auto_list, match->data);
+ }
+ }
+
+ if (find_peer_device(op, peer, device_list->data,
+ fenced_support_flag(op->action))) {
+ total_timeout += get_device_timeout(op, peer,
+ device_list->data,
+ true);
+ break;
+ }
+ } /* End Loop3: match device with peer that owns device, find device's timeout period */
+ } /* End Loop2: iterate through devices at a specific level */
+ } /*End Loop1: iterate through fencing levels */
+
+ //Add only exists automatic_list device timeout
+ if (auto_list) {
+ for (iter = auto_list; iter != NULL; iter = iter->next) {
+ GList *iter2 = NULL;
+
+ for (iter2 = op->query_results; iter2 != NULL; iter = iter2->next) {
+ peer_device_info_t *peer = iter2->data;
+ if (find_peer_device(op, peer, iter->data, st_device_supports_on)) {
+ total_timeout += get_device_timeout(op, peer,
+ iter->data, true);
+ break;
+ }
+ }
+ }
+ }
+
+ g_list_free(auto_list);
+
+ } else if (chosen_peer) {
+ total_timeout = get_peer_timeout(op, chosen_peer);
+ } else {
+ total_timeout = op->base_timeout;
+ }
+
+ /* Take any requested fencing delay into account to prevent it from eating
+ * up the total timeout.
+ */
+ return ((total_timeout ? total_timeout : op->base_timeout)
+ + (op->delay > 0 ? op->delay : 0));
+}
+
+static void
+report_timeout_period(remote_fencing_op_t * op, int op_timeout)
+{
+ GList *iter = NULL;
+ xmlNode *update = NULL;
+ const char *client_node = NULL;
+ const char *client_id = NULL;
+ const char *call_id = NULL;
+
+ if (op->call_options & st_opt_sync_call) {
+ /* There is no reason to report the timeout for a synchronous call. It
+ * is impossible to use the reported timeout to do anything when the client
+ * is blocking for the response. This update is only important for
+ * async calls that require a callback to report the results in. */
+ return;
+ } else if (!op->request) {
+ return;
+ }
+
+ crm_trace("Reporting timeout for %s (id=%.8s)", op->client_name, op->id);
+ client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE);
+ call_id = crm_element_value(op->request, F_STONITH_CALLID);
+ client_id = crm_element_value(op->request, F_STONITH_CLIENTID);
+ if (!client_node || !call_id || !client_id) {
+ return;
+ }
+
+ if (pcmk__str_eq(client_node, stonith_our_uname, pcmk__str_casei)) {
+ // Client is connected to this node, so send update directly to them
+ do_stonith_async_timeout_update(client_id, call_id, op_timeout);
+ return;
+ }
+
+ /* The client is connected to another node, relay this update to them */
+ update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0);
+ crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id);
+ crm_xml_add(update, F_STONITH_CLIENTID, client_id);
+ crm_xml_add(update, F_STONITH_CALLID, call_id);
+ crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout);
+
+ send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE);
+
+ free_xml(update);
+
+ for (iter = op->duplicates; iter != NULL; iter = iter->next) {
+ remote_fencing_op_t *dup = iter->data;
+
+ crm_trace("Reporting timeout for duplicate %.8s to client %s",
+ dup->id, dup->client_name);
+ report_timeout_period(iter->data, op_timeout);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Advance an operation to the next device in its topology
+ *
+ * \param[in,out] op Fencer operation to advance
+ * \param[in] device ID of device that just completed
+ * \param[in,out] msg If not NULL, XML reply of last delegated operation
+ */
+static void
+advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
+ xmlNode *msg)
+{
+ /* Advance to the next device at this topology level, if any */
+ if (op->devices) {
+ op->devices = op->devices->next;
+ }
+
+ /* Handle automatic unfencing if an "on" action was requested */
+ if ((op->phase == st_phase_requested)
+ && pcmk__str_eq(op->action, "on", pcmk__str_none)) {
+ /* If the device we just executed was required, it's not anymore */
+ remove_required_device(op, device);
+
+ /* If there are no more devices at this topology level, run through any
+ * remaining devices with automatic unfencing
+ */
+ if (op->devices == NULL) {
+ op->devices = op->automatic_list;
+ }
+ }
+
+ if ((op->devices == NULL) && (op->phase == st_phase_off)) {
+ /* We're done with this level and with required devices, but we had
+ * remapped "reboot" to "off", so start over with "on". If any devices
+ * need to be turned back on, op->devices will be non-NULL after this.
+ */
+ op_phase_on(op);
+ }
+
+ // This function is only called if the previous device succeeded
+ pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+
+ if (op->devices) {
+ /* Necessary devices remain, so execute the next one */
+ crm_trace("Next targeting %s on behalf of %s@%s",
+ op->target, op->client_name, op->originator);
+
+ // The requested delay has been applied for the first device
+ if (op->delay > 0) {
+ op->delay = 0;
+ }
+
+ request_peer_fencing(op, NULL);
+ } else {
+ /* We're done with all devices and phases, so finalize operation */
+ crm_trace("Marking complex fencing op targeting %s as complete",
+ op->target);
+ op->state = st_done;
+ finalize_op(op, msg, false);
+ }
+}
+
+static gboolean
+check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
+{
+ if (node_does_watchdog_fencing(op->target)) {
+
+ crm_notice("Waiting %lds for %s to self-fence (%s) for "
+ "client %s " CRM_XS " id=%.8s",
+ (stonith_watchdog_timeout_ms / 1000),
+ op->target, op->action, op->client_name, op->id);
+
+ if (op->op_timer_one) {
+ g_source_remove(op->op_timer_one);
+ }
+ op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms,
+ remote_op_watchdog_done, op);
+ return TRUE;
+ } else {
+ crm_debug("Skipping fallback to watchdog-fencing as %s is "
+ "not in host-list", op->target);
+ }
+ return FALSE;
+}
+
+/*!
+ * \internal
+ * \brief Ask a peer to execute a fencing operation
+ *
+ * \param[in,out] op Fencing operation to be executed
+ * \param[in,out] peer If NULL or topology is in use, choose best peer to
+ * execute the fencing, otherwise use this peer
+ */
+static void
+request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer)
+{
+ const char *device = NULL;
+ int timeout;
+
+ CRM_CHECK(op != NULL, return);
+
+ crm_trace("Action %.8s targeting %s for %s is %s",
+ op->id, op->target, op->client_name,
+ stonith_op_state_str(op->state));
+
+ if ((op->phase == st_phase_on) && (op->devices != NULL)) {
+ /* We are in the "on" phase of a remapped topology reboot. If this
+ * device has pcmk_reboot_action="off", or doesn't support the "on"
+ * action, skip it.
+ *
+ * We can't check device properties at this point because we haven't
+ * chosen a peer for this stage yet. Instead, we check the local node's
+ * knowledge about the device. If different versions of the fence agent
+ * are installed on different nodes, there's a chance this could be
+ * mistaken, but the worst that could happen is we don't try turning the
+ * node back on when we should.
+ */
+ device = op->devices->data;
+ if (pcmk__str_eq(fenced_device_reboot_action(device), "off",
+ pcmk__str_none)) {
+ crm_info("Not turning %s back on using %s because the device is "
+ "configured to stay off (pcmk_reboot_action='off')",
+ op->target, device);
+ advance_topology_device_in_level(op, device, NULL);
+ return;
+ }
+ if (!fenced_device_supports_on(device)) {
+ crm_info("Not turning %s back on using %s because the agent "
+ "doesn't support 'on'", op->target, device);
+ advance_topology_device_in_level(op, device, NULL);
+ return;
+ }
+ }
+
+ timeout = op->base_timeout;
+ if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) {
+ peer = stonith_choose_peer(op);
+ }
+
+ if (!op->op_timer_total) {
+ op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, peer);
+ op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op);
+ report_timeout_period(op, op->total_timeout);
+ crm_info("Total timeout set to %d for peer's fencing targeting %s for %s"
+ CRM_XS "id=%.8s",
+ op->total_timeout, op->target, op->client_name, op->id);
+ }
+
+ if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) {
+ /* Ignore the caller's peer preference if topology is in use, because
+ * that peer might not have access to the required device. With
+ * topology, stonith_choose_peer() removes the device from further
+ * consideration, so the timeout must be calculated beforehand.
+ *
+ * @TODO Basing the total timeout on the caller's preferred peer (above)
+ * is less than ideal.
+ */
+ peer = stonith_choose_peer(op);
+
+ device = op->devices->data;
+ /* Fencing timeout sent to peer takes no delay into account.
+ * The peer will add a dedicated timer for any delay upon
+ * schedule_stonith_command().
+ */
+ timeout = get_device_timeout(op, peer, device, false);
+ }
+
+ if (peer) {
+ /* Take any requested fencing delay into account to prevent it from eating
+ * up the timeout.
+ */
+ int timeout_one = (op->delay > 0 ?
+ TIMEOUT_MULTIPLY_FACTOR * op->delay : 0);
+ xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);
+
+ crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id);
+ crm_xml_add(remote_op, F_STONITH_TARGET, op->target);
+ crm_xml_add(remote_op, F_STONITH_ACTION, op->action);
+ crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator);
+ crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id);
+ crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name);
+ crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout);
+ crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options);
+ crm_xml_add_int(remote_op, F_STONITH_DELAY, op->delay);
+
+ if (device) {
+ timeout_one += TIMEOUT_MULTIPLY_FACTOR *
+ get_device_timeout(op, peer, device, true);
+ crm_notice("Requesting that %s perform '%s' action targeting %s "
+ "using %s " CRM_XS " for client %s (%ds)",
+ peer->host, op->action, op->target, device,
+ op->client_name, timeout_one);
+ crm_xml_add(remote_op, F_STONITH_DEVICE, device);
+
+ } else {
+ timeout_one += TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer);
+ crm_notice("Requesting that %s perform '%s' action targeting %s "
+ CRM_XS " for client %s (%ds, %lds)",
+ peer->host, op->action, op->target, op->client_name,
+ timeout_one, stonith_watchdog_timeout_ms);
+ }
+
+ op->state = st_exec;
+ if (op->op_timer_one) {
+ g_source_remove(op->op_timer_one);
+ op->op_timer_one = 0;
+ }
+
+ if (!((stonith_watchdog_timeout_ms > 0)
+ && (pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none)
+ || (pcmk__str_eq(peer->host, op->target, pcmk__str_casei)
+ && pcmk__is_fencing_action(op->action)))
+ && check_watchdog_fencing_and_wait(op))) {
+
+ /* Some thoughts about self-fencing cases reaching this point:
+ - Actually check in check_watchdog_fencing_and_wait
+ shouldn't fail if STONITH_WATCHDOG_ID is
+ chosen as fencing-device and it being present implies
+ watchdog-fencing is enabled anyway
+ - If watchdog-fencing is disabled either in general or for
+ a specific target - detected in check_watchdog_fencing_and_wait -
+ for some other kind of self-fencing we can't expect
+ a success answer but timeout is fine if the node doesn't
+ come back in between
+ - Delicate might be the case where we have watchdog-fencing
+ enabled for a node but the watchdog-fencing-device isn't
+ explicitly chosen for suicide. Local pe-execution in sbd
+ may detect the node as unclean and lead to timely suicide.
+ Otherwise the selection of stonith-watchdog-timeout at
+ least is questionable.
+ */
+
+ /* coming here we're not waiting for watchdog timeout -
+ thus engage timer with timout evaluated before */
+ op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
+ }
+
+ send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE);
+ peer->tried = TRUE;
+ free_xml(remote_op);
+ return;
+
+ } else if (op->phase == st_phase_on) {
+ /* A remapped "on" cannot be executed, but the node was already
+ * turned off successfully, so ignore the error and continue.
+ */
+ crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s "
+ "after successful 'off'", device, op->target);
+ advance_topology_device_in_level(op, device, NULL);
+ return;
+
+ } else if (op->owner == FALSE) {
+ crm_err("Fencing (%s) targeting %s for client %s is not ours to control",
+ op->action, op->target, op->client_name);
+
+ } else if (op->query_timer == 0) {
+ /* We've exhausted all available peers */
+ crm_info("No remaining peers capable of fencing (%s) %s for client %s "
+ CRM_XS " state=%s", op->action, op->target, op->client_name,
+ stonith_op_state_str(op->state));
+ CRM_CHECK(op->state < st_done, return);
+ finalize_timed_out_op(op, "All nodes failed, or are unable, to "
+ "fence target");
+
+ } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
+ /* if the operation never left the query state,
+ * but we have all the expected replies, then no devices
+ * are available to execute the fencing operation. */
+
+ if(stonith_watchdog_timeout_ms > 0 && pcmk__str_eq(device,
+ STONITH_WATCHDOG_ID, pcmk__str_null_matches)) {
+ if (check_watchdog_fencing_and_wait(op)) {
+ return;
+ }
+ }
+
+ if (op->state == st_query) {
+ crm_info("No peers (out of %d) have devices capable of fencing "
+ "(%s) %s for client %s " CRM_XS " state=%s",
+ op->replies, op->action, op->target, op->client_name,
+ stonith_op_state_str(op->state));
+
+ pcmk__reset_result(&op->result);
+ pcmk__set_result(&op->result, CRM_EX_ERROR,
+ PCMK_EXEC_NO_FENCE_DEVICE, NULL);
+ } else {
+ if (pcmk_is_set(op->call_options, st_opt_topology)) {
+ pcmk__reset_result(&op->result);
+ pcmk__set_result(&op->result, CRM_EX_ERROR,
+ PCMK_EXEC_NO_FENCE_DEVICE, NULL);
+ }
+ /* ... else use existing result from previous failed attempt
+ * (topology is not in use, and no devices remain to be attempted).
+ * Overwriting the result with PCMK_EXEC_NO_FENCE_DEVICE would
+ * prevent finalize_op() from setting the correct delegate if
+ * needed.
+ */
+
+ crm_info("No peers (out of %d) are capable of fencing (%s) %s "
+ "for client %s " CRM_XS " state=%s",
+ op->replies, op->action, op->target, op->client_name,
+ stonith_op_state_str(op->state));
+ }
+
+ op->state = st_failed;
+ finalize_op(op, NULL, false);
+
+ } else {
+ crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
+ "for client %s " CRM_XS " id=%.8s",
+ op->action, op->target, (device? " using " : ""),
+ (device? device : ""), op->client_name, op->id);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Comparison function for sorting query results
+ *
+ * \param[in] a GList item to compare
+ * \param[in] b GList item to compare
+ *
+ * \return Per the glib documentation, "a negative integer if the first value
+ * comes before the second, 0 if they are equal, or a positive integer
+ * if the first value comes after the second."
+ */
+static gint
+sort_peers(gconstpointer a, gconstpointer b)
+{
+ const peer_device_info_t *peer_a = a;
+ const peer_device_info_t *peer_b = b;
+
+ return (peer_b->ndevices - peer_a->ndevices);
+}
+
+/*!
+ * \internal
+ * \brief Determine if all the devices in the topology are found or not
+ *
+ * \param[in] op Fencing operation with topology to check
+ */
+static gboolean
+all_topology_devices_found(const remote_fencing_op_t *op)
+{
+ GList *device = NULL;
+ GList *iter = NULL;
+ device_properties_t *match = NULL;
+ stonith_topology_t *tp = NULL;
+ gboolean skip_target = FALSE;
+ int i;
+
+ tp = find_topology_for_host(op->target);
+ if (!tp) {
+ return FALSE;
+ }
+ if (pcmk__is_fencing_action(op->action)) {
+ /* Don't count the devices on the target node if we are killing
+ * the target node. */
+ skip_target = TRUE;
+ }
+
+ for (i = 0; i < ST_LEVEL_MAX; i++) {
+ for (device = tp->levels[i]; device; device = device->next) {
+ match = NULL;
+ for (iter = op->query_results; iter && !match; iter = iter->next) {
+ peer_device_info_t *peer = iter->data;
+
+ if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
+ continue;
+ }
+ match = find_peer_device(op, peer, device->data, st_device_supports_none);
+ }
+ if (!match) {
+ return FALSE;
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+/*!
+ * \internal
+ * \brief Parse action-specific device properties from XML
+ *
+ * \param[in] xml XML element containing the properties
+ * \param[in] peer Name of peer that sent XML (for logs)
+ * \param[in] device Device ID (for logs)
+ * \param[in] action Action the properties relate to (for logs)
+ * \param[in,out] op Fencing operation that properties are being parsed for
+ * \param[in] phase Phase the properties relate to
+ * \param[in,out] props Device properties to update
+ */
+static void
+parse_action_specific(const xmlNode *xml, const char *peer, const char *device,
+ const char *action, remote_fencing_op_t *op,
+ enum st_remap_phase phase, device_properties_t *props)
+{
+ props->custom_action_timeout[phase] = 0;
+ crm_element_value_int(xml, F_STONITH_ACTION_TIMEOUT,
+ &props->custom_action_timeout[phase]);
+ if (props->custom_action_timeout[phase]) {
+ crm_trace("Peer %s with device %s returned %s action timeout %d",
+ peer, device, action, props->custom_action_timeout[phase]);
+ }
+
+ props->delay_max[phase] = 0;
+ crm_element_value_int(xml, F_STONITH_DELAY_MAX, &props->delay_max[phase]);
+ if (props->delay_max[phase]) {
+ crm_trace("Peer %s with device %s returned maximum of random delay %d for %s",
+ peer, device, props->delay_max[phase], action);
+ }
+
+ props->delay_base[phase] = 0;
+ crm_element_value_int(xml, F_STONITH_DELAY_BASE, &props->delay_base[phase]);
+ if (props->delay_base[phase]) {
+ crm_trace("Peer %s with device %s returned base delay %d for %s",
+ peer, device, props->delay_base[phase], action);
+ }
+
+ /* Handle devices with automatic unfencing */
+ if (pcmk__str_eq(action, "on", pcmk__str_none)) {
+ int required = 0;
+
+ crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required);
+ if (required) {
+ crm_trace("Peer %s requires device %s to execute for action %s",
+ peer, device, action);
+ add_required_device(op, device);
+ }
+ }
+
+ /* If a reboot is remapped to off+on, it's possible that a node is allowed
+ * to perform one action but not another.
+ */
+ if (pcmk__xe_attr_is_true(xml, F_STONITH_ACTION_DISALLOWED)) {
+ props->disallowed[phase] = TRUE;
+ crm_trace("Peer %s is disallowed from executing %s for device %s",
+ peer, action, device);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Parse one device's properties from peer's XML query reply
+ *
+ * \param[in] xml XML node containing device properties
+ * \param[in,out] op Operation that query and reply relate to
+ * \param[in,out] peer Peer's device information
+ * \param[in] device ID of device being parsed
+ */
+static void
+add_device_properties(const xmlNode *xml, remote_fencing_op_t *op,
+ peer_device_info_t *peer, const char *device)
+{
+ xmlNode *child;
+ int verified = 0;
+ device_properties_t *props = calloc(1, sizeof(device_properties_t));
+ int flags = st_device_supports_on; /* Old nodes that don't set the flag assume they support the on action */
+
+ /* Add a new entry to this peer's devices list */
+ CRM_ASSERT(props != NULL);
+ g_hash_table_insert(peer->devices, strdup(device), props);
+
+ /* Peers with verified (monitored) access will be preferred */
+ crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified);
+ if (verified) {
+ crm_trace("Peer %s has confirmed a verified device %s",
+ peer->host, device);
+ props->verified = TRUE;
+ }
+
+ crm_element_value_int(xml, F_STONITH_DEVICE_SUPPORT_FLAGS, &flags);
+ props->device_support_flags = flags;
+
+ /* Parse action-specific device properties */
+ parse_action_specific(xml, peer->host, device, op_requested_action(op),
+ op, st_phase_requested, props);
+ for (child = pcmk__xml_first_child(xml); child != NULL;
+ child = pcmk__xml_next(child)) {
+ /* Replies for "reboot" operations will include the action-specific
+ * values for "off" and "on" in child elements, just in case the reboot
+ * winds up getting remapped.
+ */
+ if (pcmk__str_eq(ID(child), "off", pcmk__str_none)) {
+ parse_action_specific(child, peer->host, device, "off",
+ op, st_phase_off, props);
+ } else if (pcmk__str_eq(ID(child), "on", pcmk__str_none)) {
+ parse_action_specific(child, peer->host, device, "on",
+ op, st_phase_on, props);
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Parse a peer's XML query reply and add it to operation's results
+ *
+ * \param[in,out] op Operation that query and reply relate to
+ * \param[in] host Name of peer that sent this reply
+ * \param[in] ndevices Number of devices expected in reply
+ * \param[in] xml XML node containing device list
+ *
+ * \return Newly allocated result structure with parsed reply
+ */
+static peer_device_info_t *
+add_result(remote_fencing_op_t *op, const char *host, int ndevices,
+ const xmlNode *xml)
+{
+ peer_device_info_t *peer = calloc(1, sizeof(peer_device_info_t));
+ xmlNode *child;
+
+ // cppcheck seems not to understand the abort logic in CRM_CHECK
+ // cppcheck-suppress memleak
+ CRM_CHECK(peer != NULL, return NULL);
+ peer->host = strdup(host);
+ peer->devices = pcmk__strkey_table(free, free);
+
+ /* Each child element describes one capable device available to the peer */
+ for (child = pcmk__xml_first_child(xml); child != NULL;
+ child = pcmk__xml_next(child)) {
+ const char *device = ID(child);
+
+ if (device) {
+ add_device_properties(child, op, peer, device);
+ }
+ }
+
+ peer->ndevices = g_hash_table_size(peer->devices);
+ CRM_CHECK(ndevices == peer->ndevices,
+ crm_err("Query claimed to have %d device%s but %d found",
+ ndevices, pcmk__plural_s(ndevices), peer->ndevices));
+
+ op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers);
+ return peer;
+}
+
+/*!
+ * \internal
+ * \brief Handle a peer's reply to our fencing query
+ *
+ * Parse a query result from XML and store it in the remote operation
+ * table, and when enough replies have been received, issue a fencing request.
+ *
+ * \param[in] msg XML reply received
+ *
+ * \return pcmk_ok on success, -errno on error
+ *
+ * \note See initiate_remote_stonith_op() for how the XML query was initially
+ * formed, and stonith_query() for how the peer formed its XML reply.
+ */
+int
+process_remote_stonith_query(xmlNode *msg)
+{
+ int ndevices = 0;
+ gboolean host_is_target = FALSE;
+ gboolean have_all_replies = FALSE;
+ const char *id = NULL;
+ const char *host = NULL;
+ remote_fencing_op_t *op = NULL;
+ peer_device_info_t *peer = NULL;
+ uint32_t replies_expected;
+ xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
+
+ CRM_CHECK(dev != NULL, return -EPROTO);
+
+ id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
+ CRM_CHECK(id != NULL, return -EPROTO);
+
+ dev = get_xpath_object("//@" F_STONITH_AVAILABLE_DEVICES, msg, LOG_ERR);
+ CRM_CHECK(dev != NULL, return -EPROTO);
+ crm_element_value_int(dev, F_STONITH_AVAILABLE_DEVICES, &ndevices);
+
+ op = g_hash_table_lookup(stonith_remote_op_list, id);
+ if (op == NULL) {
+ crm_debug("Received query reply for unknown or expired operation %s",
+ id);
+ return -EOPNOTSUPP;
+ }
+
+ replies_expected = fencing_active_peers();
+ if (op->replies_expected < replies_expected) {
+ replies_expected = op->replies_expected;
+ }
+ if ((++op->replies >= replies_expected) && (op->state == st_query)) {
+ have_all_replies = TRUE;
+ }
+ host = crm_element_value(msg, F_ORIG);
+ host_is_target = pcmk__str_eq(host, op->target, pcmk__str_casei);
+
+ crm_info("Query result %d of %d from %s for %s/%s (%d device%s) %s",
+ op->replies, replies_expected, host,
+ op->target, op->action, ndevices, pcmk__plural_s(ndevices), id);
+ if (ndevices > 0) {
+ peer = add_result(op, host, ndevices, dev);
+ }
+
+ pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+
+ if (pcmk_is_set(op->call_options, st_opt_topology)) {
+ /* If we start the fencing before all the topology results are in,
+ * it is possible fencing levels will be skipped because of the missing
+ * query results. */
+ if (op->state == st_query && all_topology_devices_found(op)) {
+ /* All the query results are in for the topology, start the fencing ops. */
+ crm_trace("All topology devices found");
+ request_peer_fencing(op, peer);
+
+ } else if (have_all_replies) {
+ crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
+ replies_expected, op->replies);
+ request_peer_fencing(op, NULL);
+ }
+
+ } else if (op->state == st_query) {
+ int nverified = count_peer_devices(op, peer, TRUE,
+ fenced_support_flag(op->action));
+
+ /* We have a result for a non-topology fencing op that looks promising,
+ * go ahead and start fencing before query timeout */
+ if ((peer != NULL) && !host_is_target && nverified) {
+ /* we have a verified device living on a peer that is not the target */
+ crm_trace("Found %d verified device%s",
+ nverified, pcmk__plural_s(nverified));
+ request_peer_fencing(op, peer);
+
+ } else if (have_all_replies) {
+ crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
+ replies_expected, op->replies);
+ request_peer_fencing(op, NULL);
+
+ } else {
+ crm_trace("Waiting for more peer results before launching fencing operation");
+ }
+
+ } else if ((peer != NULL) && (op->state == st_done)) {
+ crm_info("Discarding query result from %s (%d device%s): "
+ "Operation is %s", peer->host,
+ peer->ndevices, pcmk__plural_s(peer->ndevices),
+ stonith_op_state_str(op->state));
+ }
+
+ return pcmk_ok;
+}
+
+/*!
+ * \internal
+ * \brief Handle a peer's reply to a fencing request
+ *
+ * Parse a fencing reply from XML, and either finalize the operation
+ * or attempt another device as appropriate.
+ *
+ * \param[in] msg XML reply received
+ */
+void
+fenced_process_fencing_reply(xmlNode *msg)
+{
+ const char *id = NULL;
+ const char *device = NULL;
+ remote_fencing_op_t *op = NULL;
+ xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
+ pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+
+ CRM_CHECK(dev != NULL, return);
+
+ id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
+ CRM_CHECK(id != NULL, return);
+
+ dev = stonith__find_xe_with_result(msg);
+ CRM_CHECK(dev != NULL, return);
+
+ stonith__xe_get_result(dev, &result);
+
+ device = crm_element_value(dev, F_STONITH_DEVICE);
+
+ if (stonith_remote_op_list) {
+ op = g_hash_table_lookup(stonith_remote_op_list, id);
+ }
+
+ if ((op == NULL) && pcmk__result_ok(&result)) {
+ /* Record successful fencing operations */
+ const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID);
+
+ op = create_remote_stonith_op(client_id, dev, TRUE);
+ }
+
+ if (op == NULL) {
+ /* Could be for an event that began before we started */
+ /* TODO: Record the op for later querying */
+ crm_info("Received peer result of unknown or expired operation %s", id);
+ pcmk__reset_result(&result);
+ return;
+ }
+
+ pcmk__reset_result(&op->result);
+ op->result = result; // The operation takes ownership of the result
+
+ if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
+ crm_err("Received outdated reply for device %s (instead of %s) to "
+ "fence (%s) %s. Operation already timed out at peer level.",
+ device, (const char *) op->devices->data, op->action, op->target);
+ return;
+ }
+
+ if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) {
+ if (pcmk__result_ok(&op->result)) {
+ op->state = st_done;
+ } else {
+ op->state = st_failed;
+ }
+ finalize_op(op, msg, false);
+ return;
+
+ } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
+ /* If this isn't a remote level broadcast, and we are not the
+ * originator of the operation, we should not be receiving this msg. */
+ crm_err("Received non-broadcast fencing result for operation %.8s "
+ "we do not own (device %s targeting %s)",
+ op->id, device, op->target);
+ return;
+ }
+
+ if (pcmk_is_set(op->call_options, st_opt_topology)) {
+ const char *device = NULL;
+ const char *reason = op->result.exit_reason;
+
+ /* We own the op, and it is complete. broadcast the result to all nodes
+ * and notify our local clients. */
+ if (op->state == st_done) {
+ finalize_op(op, msg, false);
+ return;
+ }
+
+ device = crm_element_value(msg, F_STONITH_DEVICE);
+
+ if ((op->phase == 2) && !pcmk__result_ok(&op->result)) {
+ /* A remapped "on" failed, but the node was already turned off
+ * successfully, so ignore the error and continue.
+ */
+ crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s "
+ "after successful 'off'",
+ device, pcmk_exec_status_str(op->result.execution_status),
+ (reason == NULL)? "" : ": ",
+ (reason == NULL)? "" : reason,
+ op->target);
+ pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ } else {
+ crm_notice("Action '%s' targeting %s%s%s on behalf of %s@%s: "
+ "%s%s%s%s",
+ op->action, op->target,
+ ((device == NULL)? "" : " using "),
+ ((device == NULL)? "" : device),
+ op->client_name,
+ op->originator,
+ pcmk_exec_status_str(op->result.execution_status),
+ (reason == NULL)? "" : " (",
+ (reason == NULL)? "" : reason,
+ (reason == NULL)? "" : ")");
+ }
+
+ if (pcmk__result_ok(&op->result)) {
+ /* An operation completed successfully. Try another device if
+ * necessary, otherwise mark the operation as done. */
+ advance_topology_device_in_level(op, device, msg);
+ return;
+ } else {
+ /* This device failed, time to try another topology level. If no other
+ * levels are available, mark this operation as failed and report results. */
+ if (advance_topology_level(op, false) != pcmk_rc_ok) {
+ op->state = st_failed;
+ finalize_op(op, msg, false);
+ return;
+ }
+ }
+
+ } else if (pcmk__result_ok(&op->result) && (op->devices == NULL)) {
+ op->state = st_done;
+ finalize_op(op, msg, false);
+ return;
+
+ } else if ((op->result.execution_status == PCMK_EXEC_TIMEOUT)
+ && (op->devices == NULL)) {
+ /* If the operation timed out don't bother retrying other peers. */
+ op->state = st_failed;
+ finalize_op(op, msg, false);
+ return;
+
+ } else {
+ /* fall-through and attempt other fencing action using another peer */
+ }
+
+ /* Retry on failure */
+ crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
+ op->target, op->originator, op->client_name,
+ pcmk_exec_status_str(op->result.execution_status));
+ request_peer_fencing(op, NULL);
+}
+
+gboolean
+stonith_check_fence_tolerance(int tolerance, const char *target, const char *action)
+{
+ GHashTableIter iter;
+ time_t now = time(NULL);
+ remote_fencing_op_t *rop = NULL;
+
+ if (tolerance <= 0 || !stonith_remote_op_list || target == NULL ||
+ action == NULL) {
+ return FALSE;
+ }
+
+ g_hash_table_iter_init(&iter, stonith_remote_op_list);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) {
+ if (strcmp(rop->target, target) != 0) {
+ continue;
+ } else if (rop->state != st_done) {
+ continue;
+ /* We don't have to worry about remapped reboots here
+ * because if state is done, any remapping has been undone
+ */
+ } else if (strcmp(rop->action, action) != 0) {
+ continue;
+ } else if ((rop->completed + tolerance) < now) {
+ continue;
+ }
+
+ crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s",
+ target, action, tolerance, rop->delegate, rop->originator);
+ return TRUE;
+ }
+ return FALSE;
+}
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
new file mode 100644
index 0000000..4edda6c
--- /dev/null
+++ b/daemons/fenced/pacemaker-fenced.c
@@ -0,0 +1,1751 @@
+/*
+ * Copyright 2009-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/utsname.h>
+
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h> // PRIu32, PRIx32
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/cmdline_internal.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/common/output_internal.h>
+#include <crm/cluster/internal.h>
+
+#include <crm/stonith-ng.h>
+#include <crm/fencing/internal.h>
+#include <crm/common/xml.h>
+#include <crm/common/xml_internal.h>
+
+#include <crm/common/mainloop.h>
+
+#include <crm/cib/internal.h>
+#include <crm/pengine/status.h>
+#include <pacemaker-internal.h>
+
+#include <pacemaker-fenced.h>
+
+#define SUMMARY "daemon for executing fencing devices in a Pacemaker cluster"
+
+char *stonith_our_uname = NULL;
+long stonith_watchdog_timeout_ms = 0;
+GList *stonith_watchdog_targets = NULL;
+
+static GMainLoop *mainloop = NULL;
+
+gboolean stand_alone = FALSE;
+static gboolean stonith_shutdown_flag = FALSE;
+
+static qb_ipcs_service_t *ipcs = NULL;
+static xmlNode *local_cib = NULL;
+static pe_working_set_t *fenced_data_set = NULL;
+static const unsigned long long data_set_flags = pe_flag_quick_location
+ | pe_flag_no_compat
+ | pe_flag_no_counts;
+
+static cib_t *cib_api = NULL;
+
+static pcmk__output_t *logger_out = NULL;
+static pcmk__output_t *out = NULL;
+
+pcmk__supported_format_t formats[] = {
+ PCMK__SUPPORTED_FORMAT_NONE,
+ PCMK__SUPPORTED_FORMAT_TEXT,
+ PCMK__SUPPORTED_FORMAT_XML,
+ { NULL, NULL, NULL }
+};
+
+static struct {
+ bool no_cib_connect;
+ gchar **log_files;
+} options;
+
+static crm_exit_t exit_code = CRM_EX_OK;
+
+static void stonith_shutdown(int nsig);
+static void stonith_cleanup(void);
+
+static int32_t
+st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
+{
+ if (stonith_shutdown_flag) {
+ crm_info("Ignoring new client [%d] during shutdown",
+ pcmk__client_pid(c));
+ return -EPERM;
+ }
+
+ if (pcmk__new_client(c, uid, gid) == NULL) {
+ return -EIO;
+ }
+ return 0;
+}
+
+/* Exit code means? */
+static int32_t
+st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
+{
+ uint32_t id = 0;
+ uint32_t flags = 0;
+ int call_options = 0;
+ xmlNode *request = NULL;
+ pcmk__client_t *c = pcmk__find_client(qbc);
+ const char *op = NULL;
+
+ if (c == NULL) {
+ crm_info("Invalid client: %p", qbc);
+ return 0;
+ }
+
+ request = pcmk__client_data2xml(c, data, &id, &flags);
+ if (request == NULL) {
+ pcmk__ipc_send_ack(c, id, flags, "nack", NULL, CRM_EX_PROTOCOL);
+ return 0;
+ }
+
+
+ op = crm_element_value(request, F_CRM_TASK);
+ if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
+ crm_xml_add(request, F_TYPE, T_STONITH_NG);
+ crm_xml_add(request, F_STONITH_OPERATION, op);
+ crm_xml_add(request, F_STONITH_CLIENTID, c->id);
+ crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c));
+ crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
+
+ send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE);
+ free_xml(request);
+ return 0;
+ }
+
+ if (c->name == NULL) {
+ const char *value = crm_element_value(request, F_STONITH_CLIENTNAME);
+
+ if (value == NULL) {
+ value = "unknown";
+ }
+ c->name = crm_strdup_printf("%s.%u", value, c->pid);
+ }
+
+ crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
+ crm_trace("Flags %#08" PRIx32 "/%#08x for command %" PRIu32
+ " from client %s", flags, call_options, id, pcmk__client_name(c));
+
+ if (pcmk_is_set(call_options, st_opt_sync_call)) {
+ CRM_ASSERT(flags & crm_ipc_client_response);
+ CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */
+ c->request_id = id; /* Reply only to the last one */
+ }
+
+ crm_xml_add(request, F_STONITH_CLIENTID, c->id);
+ crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c));
+ crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
+
+ crm_log_xml_trace(request, "ipc-received");
+ stonith_command(c, id, flags, request, NULL);
+
+ free_xml(request);
+ return 0;
+}
+
+/* Error code means? */
+static int32_t
+st_ipc_closed(qb_ipcs_connection_t * c)
+{
+ pcmk__client_t *client = pcmk__find_client(c);
+
+ if (client == NULL) {
+ return 0;
+ }
+
+ crm_trace("Connection %p closed", c);
+ pcmk__free_client(client);
+
+ /* 0 means: yes, go ahead and destroy the connection */
+ return 0;
+}
+
+static void
+st_ipc_destroy(qb_ipcs_connection_t * c)
+{
+ crm_trace("Connection %p destroyed", c);
+ st_ipc_closed(c);
+}
+
+static void
+stonith_peer_callback(xmlNode * msg, void *private_data)
+{
+ const char *remote_peer = crm_element_value(msg, F_ORIG);
+ const char *op = crm_element_value(msg, F_STONITH_OPERATION);
+
+ if (pcmk__str_eq(op, "poke", pcmk__str_none)) {
+ return;
+ }
+
+ crm_log_xml_trace(msg, "Peer[inbound]");
+ stonith_command(NULL, 0, 0, msg, remote_peer);
+}
+
+#if SUPPORT_COROSYNC
+static void
+stonith_peer_ais_callback(cpg_handle_t handle,
+ const struct cpg_name *groupName,
+ uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
+{
+ uint32_t kind = 0;
+ xmlNode *xml = NULL;
+ const char *from = NULL;
+ char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
+
+ if(data == NULL) {
+ return;
+ }
+ if (kind == crm_class_cluster) {
+ xml = string2xml(data);
+ if (xml == NULL) {
+ crm_err("Invalid XML: '%.120s'", data);
+ free(data);
+ return;
+ }
+ crm_xml_add(xml, F_ORIG, from);
+ /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */
+ stonith_peer_callback(xml, NULL);
+ }
+
+ free_xml(xml);
+ free(data);
+ return;
+}
+
+static void
+stonith_peer_cs_destroy(gpointer user_data)
+{
+ crm_crit("Lost connection to cluster layer, shutting down");
+ stonith_shutdown(0);
+}
+#endif
+
+void
+do_local_reply(xmlNode *notify_src, pcmk__client_t *client, int call_options)
+{
+ /* send callback to originating child */
+ int local_rc = pcmk_rc_ok;
+ int rid = 0;
+ uint32_t ipc_flags = crm_ipc_server_event;
+
+ if (pcmk_is_set(call_options, st_opt_sync_call)) {
+ CRM_LOG_ASSERT(client->request_id);
+ rid = client->request_id;
+ client->request_id = 0;
+ ipc_flags = crm_ipc_flags_none;
+ }
+
+ local_rc = pcmk__ipc_send_xml(client, rid, notify_src, ipc_flags);
+ if (local_rc == pcmk_rc_ok) {
+ crm_trace("Sent response %d to client %s",
+ rid, pcmk__client_name(client));
+ } else {
+ crm_warn("%synchronous reply to client %s failed: %s",
+ (pcmk_is_set(call_options, st_opt_sync_call)? "S" : "As"),
+ pcmk__client_name(client), pcmk_rc_str(local_rc));
+ }
+}
+
+uint64_t
+get_stonith_flag(const char *name)
+{
+ if (pcmk__str_eq(name, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) {
+ return st_callback_notify_fence;
+
+ } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) {
+ return st_callback_device_add;
+
+ } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) {
+ return st_callback_device_del;
+
+ } else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY, pcmk__str_casei)) {
+ return st_callback_notify_history;
+
+ } else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY_SYNCED, pcmk__str_casei)) {
+ return st_callback_notify_history_synced;
+
+ }
+ return st_callback_unknown;
+}
+
+static void
+stonith_notify_client(gpointer key, gpointer value, gpointer user_data)
+{
+
+ xmlNode *update_msg = user_data;
+ pcmk__client_t *client = value;
+ const char *type = NULL;
+
+ CRM_CHECK(client != NULL, return);
+ CRM_CHECK(update_msg != NULL, return);
+
+ type = crm_element_value(update_msg, F_SUBTYPE);
+ CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return);
+
+ if (client->ipcs == NULL) {
+ crm_trace("Skipping client with NULL channel");
+ return;
+ }
+
+ if (pcmk_is_set(client->flags, get_stonith_flag(type))) {
+ int rc = pcmk__ipc_send_xml(client, 0, update_msg,
+ crm_ipc_server_event);
+
+ if (rc != pcmk_rc_ok) {
+ crm_warn("%s notification of client %s failed: %s "
+ CRM_XS " id=%.8s rc=%d", type, pcmk__client_name(client),
+ pcmk_rc_str(rc), client->id, rc);
+ } else {
+ crm_trace("Sent %s notification to client %s",
+ type, pcmk__client_name(client));
+ }
+ }
+}
+
+void
+do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout)
+{
+ pcmk__client_t *client = NULL;
+ xmlNode *notify_data = NULL;
+
+ if (!timeout || !call_id || !client_id) {
+ return;
+ }
+
+ client = pcmk__find_client_by_id(client_id);
+ if (!client) {
+ return;
+ }
+
+ notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE);
+ crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE);
+ crm_xml_add(notify_data, F_STONITH_CALLID, call_id);
+ crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout);
+
+ crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id);
+
+ if (client) {
+ pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event);
+ }
+
+ free_xml(notify_data);
+}
+
+/*!
+ * \internal
+ * \brief Notify relevant IPC clients of a fencing operation result
+ *
+ * \param[in] type Notification type
+ * \param[in] result Result of fencing operation (assume success if NULL)
+ * \param[in] data If not NULL, add to notification as call data
+ */
+void
+fenced_send_notification(const char *type, const pcmk__action_result_t *result,
+ xmlNode *data)
+{
+ /* TODO: Standardize the contents of data */
+ xmlNode *update_msg = create_xml_node(NULL, "notify");
+
+ CRM_LOG_ASSERT(type != NULL);
+
+ crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY);
+ crm_xml_add(update_msg, F_SUBTYPE, type);
+ crm_xml_add(update_msg, F_STONITH_OPERATION, type);
+ stonith__xe_set_result(update_msg, result);
+
+ if (data != NULL) {
+ add_message_xml(update_msg, F_STONITH_CALLDATA, data);
+ }
+
+ crm_trace("Notifying clients");
+ pcmk__foreach_ipc_client(stonith_notify_client, update_msg);
+ free_xml(update_msg);
+ crm_trace("Notify complete");
+}
+
+/*!
+ * \internal
+ * \brief Send notifications for a configuration change to subscribed clients
+ *
+ * \param[in] op Notification type (STONITH_OP_DEVICE_ADD,
+ * STONITH_OP_DEVICE_DEL, STONITH_OP_LEVEL_ADD, or
+ * STONITH_OP_LEVEL_DEL)
+ * \param[in] result Operation result
+ * \param[in] desc Description of what changed
+ * \param[in] active Current number of devices or topologies in use
+ */
+static void
+send_config_notification(const char *op, const pcmk__action_result_t *result,
+ const char *desc, int active)
+{
+ xmlNode *notify_data = create_xml_node(NULL, op);
+
+ CRM_CHECK(notify_data != NULL, return);
+
+ crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
+ crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active);
+
+ fenced_send_notification(op, result, notify_data);
+ free_xml(notify_data);
+}
+
+/*!
+ * \internal
+ * \brief Send notifications for a device change to subscribed clients
+ *
+ * \param[in] op Notification type (STONITH_OP_DEVICE_ADD or
+ * STONITH_OP_DEVICE_DEL)
+ * \param[in] result Operation result
+ * \param[in] desc ID of device that changed
+ */
+void
+fenced_send_device_notification(const char *op,
+ const pcmk__action_result_t *result,
+ const char *desc)
+{
+ send_config_notification(op, result, desc, g_hash_table_size(device_list));
+}
+
+/*!
+ * \internal
+ * \brief Send notifications for a topology level change to subscribed clients
+ *
+ * \param[in] op Notification type (STONITH_OP_LEVEL_ADD or
+ * STONITH_OP_LEVEL_DEL)
+ * \param[in] result Operation result
+ * \param[in] desc String representation of level (<target>[<level_index>])
+ */
+void
+fenced_send_level_notification(const char *op,
+ const pcmk__action_result_t *result,
+ const char *desc)
+{
+ send_config_notification(op, result, desc, g_hash_table_size(topology));
+}
+
+static void
+topology_remove_helper(const char *node, int level)
+{
+ char *desc = NULL;
+ pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+ xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL);
+
+ crm_xml_add(data, F_STONITH_ORIGIN, __func__);
+ crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level);
+ crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);
+
+ fenced_unregister_level(data, &desc, &result);
+ fenced_send_level_notification(STONITH_OP_LEVEL_DEL, &result, desc);
+ pcmk__reset_result(&result);
+ free_xml(data);
+ free(desc);
+}
+
+static void
+remove_cib_device(xmlXPathObjectPtr xpathObj)
+{
+ int max = numXpathResults(xpathObj), lpc = 0;
+
+ for (lpc = 0; lpc < max; lpc++) {
+ const char *rsc_id = NULL;
+ const char *standard = NULL;
+ xmlNode *match = getXpathResult(xpathObj, lpc);
+
+ CRM_LOG_ASSERT(match != NULL);
+ if(match != NULL) {
+ standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
+ }
+
+ if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
+ continue;
+ }
+
+ rsc_id = crm_element_value(match, XML_ATTR_ID);
+
+ stonith_device_remove(rsc_id, true);
+ }
+}
+
+static void
+remove_topology_level(xmlNode *match)
+{
+ int index = 0;
+ char *key = NULL;
+
+ CRM_CHECK(match != NULL, return);
+
+ key = stonith_level_key(match, fenced_target_by_unknown);
+ crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
+ topology_remove_helper(key, index);
+ free(key);
+}
+
+static void
+add_topology_level(xmlNode *match)
+{
+ char *desc = NULL;
+ pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+
+ CRM_CHECK(match != NULL, return);
+
+ fenced_register_level(match, &desc, &result);
+ fenced_send_level_notification(STONITH_OP_LEVEL_ADD, &result, desc);
+ pcmk__reset_result(&result);
+ free(desc);
+}
+
+static void
+remove_fencing_topology(xmlXPathObjectPtr xpathObj)
+{
+ int max = numXpathResults(xpathObj), lpc = 0;
+
+ for (lpc = 0; lpc < max; lpc++) {
+ xmlNode *match = getXpathResult(xpathObj, lpc);
+
+ CRM_LOG_ASSERT(match != NULL);
+ if (match && crm_element_value(match, XML_DIFF_MARKER)) {
+ /* Deletion */
+ int index = 0;
+ char *target = stonith_level_key(match, fenced_target_by_unknown);
+
+ crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
+ if (target == NULL) {
+ crm_err("Invalid fencing target in element %s", ID(match));
+
+ } else if (index <= 0) {
+ crm_err("Invalid level for %s in element %s", target, ID(match));
+
+ } else {
+ topology_remove_helper(target, index);
+ }
+ /* } else { Deal with modifications during the 'addition' stage */
+ }
+ }
+}
+
+static void
+register_fencing_topology(xmlXPathObjectPtr xpathObj)
+{
+ int max = numXpathResults(xpathObj), lpc = 0;
+
+ for (lpc = 0; lpc < max; lpc++) {
+ xmlNode *match = getXpathResult(xpathObj, lpc);
+
+ remove_topology_level(match);
+ add_topology_level(match);
+ }
+}
+
+/* Fencing
+<diff crm_feature_set="3.0.6">
+ <diff-removed>
+ <fencing-topology>
+ <fencing-level id="f-p1.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="removed:top"/>
+ <fencing-level id="f-p1.2" target="pcmk-1" index="2" devices="power" __crm_diff_marker__="removed:top"/>
+ <fencing-level devices="disk,network" id="f-p2.1"/>
+ </fencing-topology>
+ </diff-removed>
+ <diff-added>
+ <fencing-topology>
+ <fencing-level id="f-p.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="added:top"/>
+ <fencing-level id="f-p2.1" target="pcmk-2" index="1" devices="disk,something"/>
+ <fencing-level id="f-p3.1" target="pcmk-2" index="2" devices="power" __crm_diff_marker__="added:top"/>
+ </fencing-topology>
+ </diff-added>
+</diff>
+*/
+
+static void
+fencing_topology_init(void)
+{
+ xmlXPathObjectPtr xpathObj = NULL;
+ const char *xpath = "//" XML_TAG_FENCING_LEVEL;
+
+ crm_trace("Full topology refresh");
+ free_topology_list();
+ init_topology_list();
+
+ /* Grab everything */
+ xpathObj = xpath_search(local_cib, xpath);
+ register_fencing_topology(xpathObj);
+
+ freeXpathObject(xpathObj);
+}
+
+#define rsc_name(x) x->clone_name?x->clone_name:x->id
+
+/*!
+ * \internal
+ * \brief Check whether our uname is in a resource's allowed node list
+ *
+ * \param[in] rsc Resource to check
+ *
+ * \return Pointer to node object if found, NULL otherwise
+ */
+static pe_node_t *
+our_node_allowed_for(const pe_resource_t *rsc)
+{
+ GHashTableIter iter;
+ pe_node_t *node = NULL;
+
+ if (rsc && stonith_our_uname) {
+ g_hash_table_iter_init(&iter, rsc->allowed_nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
+ if (node && strcmp(node->details->uname, stonith_our_uname) == 0) {
+ break;
+ }
+ node = NULL;
+ }
+ }
+ return node;
+}
+
+static void
+watchdog_device_update(void)
+{
+ if (stonith_watchdog_timeout_ms > 0) {
+ if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) &&
+ !stonith_watchdog_targets) {
+ /* getting here watchdog-fencing enabled, no device there yet
+ and reason isn't stonith_watchdog_targets preventing that
+ */
+ int rc;
+ xmlNode *xml;
+
+ xml = create_device_registration_xml(
+ STONITH_WATCHDOG_ID,
+ st_namespace_internal,
+ STONITH_WATCHDOG_AGENT,
+ NULL, /* stonith_device_register will add our
+ own name as PCMK_STONITH_HOST_LIST param
+ so we can skip that here
+ */
+ NULL);
+ rc = stonith_device_register(xml, TRUE);
+ free_xml(xml);
+ if (rc != pcmk_ok) {
+ rc = pcmk_legacy2rc(rc);
+ exit_code = CRM_EX_FATAL;
+ crm_crit("Cannot register watchdog pseudo fence agent: %s",
+ pcmk_rc_str(rc));
+ stonith_shutdown(0);
+ }
+ }
+
+ } else if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) != NULL) {
+ /* be silent if no device - todo parameter to stonith_device_remove */
+ stonith_device_remove(STONITH_WATCHDOG_ID, true);
+ }
+}
+
+static void
+update_stonith_watchdog_timeout_ms(xmlNode *cib)
+{
+ long timeout_ms = 0;
+ xmlNode *stonith_watchdog_xml = NULL;
+ const char *value = NULL;
+
+ stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']",
+ cib, LOG_NEVER);
+ if (stonith_watchdog_xml) {
+ value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE);
+ }
+ if (value) {
+ timeout_ms = crm_get_msec(value);
+ }
+
+ if (timeout_ms < 0) {
+ timeout_ms = pcmk__auto_watchdog_timeout();
+ }
+
+ stonith_watchdog_timeout_ms = timeout_ms;
+}
+
+/*!
+ * \internal
+ * \brief If a resource or any of its children are STONITH devices, update their
+ * definitions given a cluster working set.
+ *
+ * \param[in,out] rsc Resource to check
+ * \param[in,out] data_set Cluster working set with device information
+ */
+static void
+cib_device_update(pe_resource_t *rsc, pe_working_set_t *data_set)
+{
+ pe_node_t *node = NULL;
+ const char *value = NULL;
+ const char *rclass = NULL;
+ pe_node_t *parent = NULL;
+
+ /* If this is a complex resource, check children rather than this resource itself. */
+ if(rsc->children) {
+ GList *gIter = NULL;
+ for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
+ cib_device_update(gIter->data, data_set);
+ if(pe_rsc_is_clone(rsc)) {
+ crm_trace("Only processing one copy of the clone %s", rsc->id);
+ break;
+ }
+ }
+ return;
+ }
+
+ /* We only care about STONITH resources. */
+ rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
+ if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
+ return;
+ }
+
+ /* If this STONITH resource is disabled, remove it. */
+ if (pe__resource_is_disabled(rsc)) {
+ crm_info("Device %s has been disabled", rsc->id);
+ return;
+ }
+
+ /* if watchdog-fencing is disabled handle any watchdog-fence
+ resource as if it was disabled
+ */
+ if ((stonith_watchdog_timeout_ms <= 0) &&
+ pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
+ crm_info("Watchdog-fencing disabled thus handling "
+ "device %s as disabled", rsc->id);
+ return;
+ }
+
+ /* Check whether our node is allowed for this resource (and its parent if in a group) */
+ node = our_node_allowed_for(rsc);
+ if (rsc->parent && (rsc->parent->variant == pe_group)) {
+ parent = our_node_allowed_for(rsc->parent);
+ }
+
+ if(node == NULL) {
+ /* Our node is disallowed, so remove the device */
+ GHashTableIter iter;
+
+ crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname);
+ g_hash_table_iter_init(&iter, rsc->allowed_nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
+ crm_trace("Available: %s = %d", pe__node_name(node), node->weight);
+ }
+
+ return;
+
+ } else if(node->weight < 0 || (parent && parent->weight < 0)) {
+ /* Our node (or its group) is disallowed by score, so remove the device */
+ int score = (node->weight < 0)? node->weight : parent->weight;
+
+ crm_info("Device %s has been disabled on %s: score=%s",
+ rsc->id, stonith_our_uname, pcmk_readable_score(score));
+ return;
+
+ } else {
+ /* Our node is allowed, so update the device information */
+ int rc;
+ xmlNode *data;
+ GHashTable *rsc_params = NULL;
+ GHashTableIter gIter;
+ stonith_key_value_t *params = NULL;
+
+ const char *name = NULL;
+ const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE);
+ const char *rsc_provides = NULL;
+
+ crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight);
+ rsc_params = pe_rsc_params(rsc, node, data_set);
+ get_meta_attributes(rsc->meta, rsc, node, data_set);
+
+ rsc_provides = g_hash_table_lookup(rsc->meta, PCMK_STONITH_PROVIDES);
+
+ g_hash_table_iter_init(&gIter, rsc_params);
+ while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) {
+ if (!name || !value) {
+ continue;
+ }
+ params = stonith_key_value_add(params, name, value);
+ crm_trace(" %s=%s", name, value);
+ }
+
+ data = create_device_registration_xml(rsc_name(rsc), st_namespace_any,
+ agent, params, rsc_provides);
+ stonith_key_value_freeall(params, 1, 1);
+ rc = stonith_device_register(data, TRUE);
+ CRM_ASSERT(rc == pcmk_ok);
+ free_xml(data);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Update all STONITH device definitions based on current CIB
+ */
+static void
+cib_devices_update(void)
+{
+ GHashTableIter iter;
+ stonith_device_t *device = NULL;
+
+ crm_info("Updating devices to version %s.%s.%s",
+ crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN),
+ crm_element_value(local_cib, XML_ATTR_GENERATION),
+ crm_element_value(local_cib, XML_ATTR_NUMUPDATES));
+
+ if (fenced_data_set->now != NULL) {
+ crm_time_free(fenced_data_set->now);
+ fenced_data_set->now = NULL;
+ }
+ fenced_data_set->localhost = stonith_our_uname;
+ pcmk__schedule_actions(local_cib, data_set_flags, fenced_data_set);
+
+ g_hash_table_iter_init(&iter, device_list);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) {
+ if (device->cib_registered) {
+ device->dirty = TRUE;
+ }
+ }
+
+ /* have list repopulated if cib has a watchdog-fencing-resource
+ TODO: keep a cached list for queries happening while we are refreshing
+ */
+ g_list_free_full(stonith_watchdog_targets, free);
+ stonith_watchdog_targets = NULL;
+ g_list_foreach(fenced_data_set->resources, (GFunc) cib_device_update, fenced_data_set);
+
+ g_hash_table_iter_init(&iter, device_list);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) {
+ if (device->dirty) {
+ g_hash_table_iter_remove(&iter);
+ }
+ }
+
+ fenced_data_set->input = NULL; // Wasn't a copy, so don't let API free it
+ pe_reset_working_set(fenced_data_set);
+}
+
+static void
+update_cib_stonith_devices_v2(const char *event, xmlNode * msg)
+{
+ xmlNode *change = NULL;
+ char *reason = NULL;
+ bool needs_update = FALSE;
+ xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
+
+ for (change = pcmk__xml_first_child(patchset); change != NULL;
+ change = pcmk__xml_next(change)) {
+ const char *op = crm_element_value(change, XML_DIFF_OP);
+ const char *xpath = crm_element_value(change, XML_DIFF_PATH);
+ const char *shortpath = NULL;
+
+ if ((op == NULL) ||
+ (strcmp(op, "move") == 0) ||
+ strstr(xpath, "/"XML_CIB_TAG_STATUS)) {
+ continue;
+ } else if (pcmk__str_eq(op, "delete", pcmk__str_casei) && strstr(xpath, "/"XML_CIB_TAG_RESOURCE)) {
+ const char *rsc_id = NULL;
+ char *search = NULL;
+ char *mutable = NULL;
+
+ if (strstr(xpath, XML_TAG_ATTR_SETS) ||
+ strstr(xpath, XML_TAG_META_SETS)) {
+ needs_update = TRUE;
+ pcmk__str_update(&reason,
+ "(meta) attribute deleted from resource");
+ break;
+ }
+ pcmk__str_update(&mutable, xpath);
+ rsc_id = strstr(mutable, "primitive[@" XML_ATTR_ID "=\'");
+ if (rsc_id != NULL) {
+ rsc_id += strlen("primitive[@" XML_ATTR_ID "=\'");
+ search = strchr(rsc_id, '\'');
+ }
+ if (search != NULL) {
+ *search = 0;
+ stonith_device_remove(rsc_id, true);
+ /* watchdog_device_update called afterwards
+ to fall back to implicit definition if needed */
+ } else {
+ crm_warn("Ignoring malformed CIB update (resource deletion)");
+ }
+ free(mutable);
+
+ } else if (strstr(xpath, "/"XML_CIB_TAG_RESOURCES) ||
+ strstr(xpath, "/"XML_CIB_TAG_CONSTRAINTS) ||
+ strstr(xpath, "/"XML_CIB_TAG_RSCCONFIG)) {
+ shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath);
+ reason = crm_strdup_printf("%s %s", op, shortpath+1);
+ needs_update = TRUE;
+ break;
+ }
+ }
+
+ if(needs_update) {
+ crm_info("Updating device list from CIB: %s", reason);
+ cib_devices_update();
+ } else {
+ crm_trace("No updates for device list found in CIB");
+ }
+ free(reason);
+}
+
+
+static void
+update_cib_stonith_devices_v1(const char *event, xmlNode * msg)
+{
+ const char *reason = "none";
+ gboolean needs_update = FALSE;
+ xmlXPathObjectPtr xpath_obj = NULL;
+
+ /* process new constraints */
+ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION);
+ if (numXpathResults(xpath_obj) > 0) {
+ int max = numXpathResults(xpath_obj), lpc = 0;
+
+ /* Safest and simplest to always recompute */
+ needs_update = TRUE;
+ reason = "new location constraint";
+
+ for (lpc = 0; lpc < max; lpc++) {
+ xmlNode *match = getXpathResult(xpath_obj, lpc);
+
+ crm_log_xml_trace(match, "new constraint");
+ }
+ }
+ freeXpathObject(xpath_obj);
+
+ /* process deletions */
+ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE);
+ if (numXpathResults(xpath_obj) > 0) {
+ remove_cib_device(xpath_obj);
+ }
+ freeXpathObject(xpath_obj);
+
+ /* process additions */
+ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE);
+ if (numXpathResults(xpath_obj) > 0) {
+ int max = numXpathResults(xpath_obj), lpc = 0;
+
+ for (lpc = 0; lpc < max; lpc++) {
+ const char *rsc_id = NULL;
+ const char *standard = NULL;
+ xmlNode *match = getXpathResult(xpath_obj, lpc);
+
+ rsc_id = crm_element_value(match, XML_ATTR_ID);
+ standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
+
+ if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
+ continue;
+ }
+
+ crm_trace("Fencing resource %s was added or modified", rsc_id);
+ reason = "new resource";
+ needs_update = TRUE;
+ }
+ }
+ freeXpathObject(xpath_obj);
+
+ if(needs_update) {
+ crm_info("Updating device list from CIB: %s", reason);
+ cib_devices_update();
+ }
+}
+
+static void
+update_cib_stonith_devices(const char *event, xmlNode * msg)
+{
+ int format = 1;
+ xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
+
+ CRM_ASSERT(patchset);
+ crm_element_value_int(patchset, "format", &format);
+ switch(format) {
+ case 1:
+ update_cib_stonith_devices_v1(event, msg);
+ break;
+ case 2:
+ update_cib_stonith_devices_v2(event, msg);
+ break;
+ default:
+ crm_warn("Unknown patch format: %d", format);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Check whether a node has a specific attribute name/value
+ *
+ * \param[in] node Name of node to check
+ * \param[in] name Name of an attribute to look for
+ * \param[in] value The value the named attribute needs to be set to in order to be considered a match
+ *
+ * \return TRUE if the locally cached CIB has the specified node attribute
+ */
+gboolean
+node_has_attr(const char *node, const char *name, const char *value)
+{
+ GString *xpath = NULL;
+ xmlNode *match;
+
+ CRM_CHECK((local_cib != NULL) && (node != NULL) && (name != NULL)
+ && (value != NULL), return FALSE);
+
+ /* Search for the node's attributes in the CIB. While the schema allows
+ * multiple sets of instance attributes, and allows instance attributes to
+ * use id-ref to reference values elsewhere, that is intended for resources,
+ * so we ignore that here.
+ */
+ xpath = g_string_sized_new(256);
+ pcmk__g_strcat(xpath,
+ "//" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE
+ "[@" XML_ATTR_UNAME "='", node, "']/" XML_TAG_ATTR_SETS
+ "/" XML_CIB_TAG_NVPAIR
+ "[@" XML_NVPAIR_ATTR_NAME "='", name, "' "
+ "and @" XML_NVPAIR_ATTR_VALUE "='", value, "']", NULL);
+
+ match = get_xpath_object((const char *) xpath->str, local_cib, LOG_NEVER);
+
+ g_string_free(xpath, TRUE);
+ return (match != NULL);
+}
+
+/*!
+ * \internal
+ * \brief Check whether a node does watchdog-fencing
+ *
+ * \param[in] node Name of node to check
+ *
+ * \return TRUE if node found in stonith_watchdog_targets
+ * or stonith_watchdog_targets is empty indicating
+ * all nodes are doing watchdog-fencing
+ */
+gboolean
+node_does_watchdog_fencing(const char *node)
+{
+ return ((stonith_watchdog_targets == NULL) ||
+ pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei));
+}
+
+
+static void
+update_fencing_topology(const char *event, xmlNode * msg)
+{
+ int format = 1;
+ const char *xpath;
+ xmlXPathObjectPtr xpathObj = NULL;
+ xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
+
+ CRM_ASSERT(patchset);
+ crm_element_value_int(patchset, "format", &format);
+
+ if(format == 1) {
+ /* Process deletions (only) */
+ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL;
+ xpathObj = xpath_search(msg, xpath);
+
+ remove_fencing_topology(xpathObj);
+ freeXpathObject(xpathObj);
+
+ /* Process additions and changes */
+ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL;
+ xpathObj = xpath_search(msg, xpath);
+
+ register_fencing_topology(xpathObj);
+ freeXpathObject(xpathObj);
+
+ } else if(format == 2) {
+ xmlNode *change = NULL;
+ int add[] = { 0, 0, 0 };
+ int del[] = { 0, 0, 0 };
+
+ xml_patch_versions(patchset, add, del);
+
+ for (change = pcmk__xml_first_child(patchset); change != NULL;
+ change = pcmk__xml_next(change)) {
+ const char *op = crm_element_value(change, XML_DIFF_OP);
+ const char *xpath = crm_element_value(change, XML_DIFF_PATH);
+
+ if(op == NULL) {
+ continue;
+
+ } else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) {
+ /* Change to a specific entry */
+
+ crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath);
+ if(strcmp(op, "move") == 0) {
+ continue;
+
+ } else if(strcmp(op, "create") == 0) {
+ add_topology_level(change->children);
+
+ } else if(strcmp(op, "modify") == 0) {
+ xmlNode *match = first_named_child(change, XML_DIFF_RESULT);
+
+ if(match) {
+ remove_topology_level(match->children);
+ add_topology_level(match->children);
+ }
+
+ } else if(strcmp(op, "delete") == 0) {
+ /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */
+ crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s",
+ op, add[0], add[1], add[2], xpath);
+ fencing_topology_init();
+ return;
+ }
+
+ } else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) {
+ /* Change to the topology in general */
+ crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s",
+ op, add[0], add[1], add[2], xpath);
+ fencing_topology_init();
+ return;
+
+ } else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) {
+ /* Changes to the whole config section, possibly including the topology as a whild */
+ if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) {
+ crm_trace("Nothing for us in %s operation %d.%d.%d for %s.",
+ op, add[0], add[1], add[2], xpath);
+
+ } else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) {
+ crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.",
+ op, add[0], add[1], add[2], xpath);
+ fencing_topology_init();
+ return;
+ }
+
+ } else {
+ crm_trace("Nothing for us in %s operation %d.%d.%d for %s",
+ op, add[0], add[1], add[2], xpath);
+ }
+ }
+
+ } else {
+ crm_warn("Unknown patch format: %d", format);
+ }
+}
+static bool have_cib_devices = FALSE;
+
+static void
+update_cib_cache_cb(const char *event, xmlNode * msg)
+{
+ int rc = pcmk_ok;
+ long timeout_ms_saved = stonith_watchdog_timeout_ms;
+ bool need_full_refresh = false;
+
+ if(!have_cib_devices) {
+ crm_trace("Skipping updates until we get a full dump");
+ return;
+
+ } else if(msg == NULL) {
+ crm_trace("Missing %s update", event);
+ return;
+ }
+
+ /* Maintain a local copy of the CIB so that we have full access
+ * to device definitions, location constraints, and node attributes
+ */
+ if (local_cib != NULL) {
+ int rc = pcmk_ok;
+ xmlNode *patchset = NULL;
+
+ crm_element_value_int(msg, F_CIB_RC, &rc);
+ if (rc != pcmk_ok) {
+ return;
+ }
+
+ patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
+ pcmk__output_set_log_level(logger_out, LOG_TRACE);
+ out->message(out, "xml-patchset", patchset);
+ rc = xml_apply_patchset(local_cib, patchset, TRUE);
+ switch (rc) {
+ case pcmk_ok:
+ case -pcmk_err_old_data:
+ break;
+ case -pcmk_err_diff_resync:
+ case -pcmk_err_diff_failed:
+ crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc);
+ free_xml(local_cib);
+ local_cib = NULL;
+ break;
+ default:
+ crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc);
+ free_xml(local_cib);
+ local_cib = NULL;
+ }
+ }
+
+ if (local_cib == NULL) {
+ crm_trace("Re-requesting full CIB");
+ rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call);
+ if(rc != pcmk_ok) {
+ crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc);
+ return;
+ }
+ CRM_ASSERT(local_cib != NULL);
+ need_full_refresh = true;
+ }
+
+ pcmk__refresh_node_caches_from_cib(local_cib);
+ update_stonith_watchdog_timeout_ms(local_cib);
+
+ if (timeout_ms_saved != stonith_watchdog_timeout_ms) {
+ need_full_refresh = true;
+ }
+
+ if (need_full_refresh) {
+ fencing_topology_init();
+ cib_devices_update();
+ } else {
+ // Partial refresh
+ update_fencing_topology(event, msg);
+ update_cib_stonith_devices(event, msg);
+ }
+
+ watchdog_device_update();
+}
+
+static void
+init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ crm_info("Updating device list from CIB");
+ have_cib_devices = TRUE;
+ local_cib = copy_xml(output);
+
+ pcmk__refresh_node_caches_from_cib(local_cib);
+ update_stonith_watchdog_timeout_ms(local_cib);
+
+ fencing_topology_init();
+ cib_devices_update();
+ watchdog_device_update();
+}
+
+static void
+stonith_shutdown(int nsig)
+{
+ crm_info("Terminating with %d clients", pcmk__ipc_client_count());
+ stonith_shutdown_flag = TRUE;
+ if (mainloop != NULL && g_main_loop_is_running(mainloop)) {
+ g_main_loop_quit(mainloop);
+ }
+}
+
+static void
+cib_connection_destroy(gpointer user_data)
+{
+ if (stonith_shutdown_flag) {
+ crm_info("Connection to the CIB manager closed");
+ return;
+ } else {
+ crm_crit("Lost connection to the CIB manager, shutting down");
+ }
+ if (cib_api) {
+ cib_api->cmds->signoff(cib_api);
+ }
+ stonith_shutdown(0);
+}
+
+static void
+stonith_cleanup(void)
+{
+ if (cib_api) {
+ cib_api->cmds->del_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb);
+ cib_api->cmds->signoff(cib_api);
+ }
+
+ if (ipcs) {
+ qb_ipcs_destroy(ipcs);
+ }
+
+ crm_peer_destroy();
+ pcmk__client_cleanup();
+ free_stonith_remote_op_list();
+ free_topology_list();
+ free_device_list();
+ free_metadata_cache();
+ fenced_unregister_handlers();
+
+ free(stonith_our_uname);
+ stonith_our_uname = NULL;
+
+ free_xml(local_cib);
+ local_cib = NULL;
+}
+
+static gboolean
+stand_alone_cpg_cb(const gchar *option_name, const gchar *optarg, gpointer data,
+ GError **error)
+{
+ stand_alone = FALSE;
+ options.no_cib_connect = true;
+ return TRUE;
+}
+
+static void
+setup_cib(void)
+{
+ int rc, retries = 0;
+
+ cib_api = cib_new();
+ if (cib_api == NULL) {
+ crm_err("No connection to the CIB manager");
+ return;
+ }
+
+ do {
+ sleep(retries);
+ rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command);
+ } while (rc == -ENOTCONN && ++retries < 5);
+
+ if (rc != pcmk_ok) {
+ crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc);
+
+ } else if (pcmk_ok !=
+ cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) {
+ crm_err("Could not set CIB notification callback");
+
+ } else {
+ rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local);
+ cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb",
+ init_cib_cache_cb);
+ cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy);
+ crm_info("Watching for fencing topology changes");
+ }
+}
+
+struct qb_ipcs_service_handlers ipc_callbacks = {
+ .connection_accept = st_ipc_accept,
+ .connection_created = NULL,
+ .msg_process = st_ipc_dispatch,
+ .connection_closed = st_ipc_closed,
+ .connection_destroyed = st_ipc_destroy
+};
+
+/*!
+ * \internal
+ * \brief Callback for peer status changes
+ *
+ * \param[in] type What changed
+ * \param[in] node What peer had the change
+ * \param[in] data Previous value of what changed
+ */
+static void
+st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
+{
+ if ((type != crm_status_processes)
+ && !pcmk_is_set(node->flags, crm_remote_node)) {
+ /*
+ * This is a hack until we can send to a nodeid and/or we fix node name lookups
+ * These messages are ignored in stonith_peer_callback()
+ */
+ xmlNode *query = create_xml_node(NULL, "stonith_command");
+
+ crm_xml_add(query, F_XML_TAGNAME, "stonith_command");
+ crm_xml_add(query, F_TYPE, T_STONITH_NG);
+ crm_xml_add(query, F_STONITH_OPERATION, "poke");
+
+ crm_debug("Broadcasting our uname because of node %u", node->id);
+ send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
+
+ free_xml(query);
+ }
+}
+
+static pcmk__cluster_option_t fencer_options[] = {
+ /* name, old name, type, allowed values,
+ * default value, validator,
+ * short description,
+ * long description
+ */
+ {
+ PCMK_STONITH_HOST_ARGUMENT, NULL, "string", NULL, "port", NULL,
+ N_("Advanced use only: An alternate parameter to supply instead of 'port'"),
+ N_("some devices do not support the "
+ "standard 'port' parameter or may provide additional ones. Use "
+ "this to specify an alternate, device-specific, parameter "
+ "that should indicate the machine to be fenced. A value of "
+ "none can be used to tell the cluster not to supply any "
+ "additional parameters.")
+ },
+ {
+ PCMK_STONITH_HOST_MAP,NULL, "string", NULL, "", NULL,
+ N_("A mapping of host names to ports numbers for devices that do not support host names."),
+ N_("Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2")
+ },
+ {
+ PCMK_STONITH_HOST_LIST,NULL, "string", NULL, "", NULL,
+ N_("Eg. node1,node2,node3"),
+ N_("A list of machines controlled by "
+ "this device (Optional unless pcmk_host_list=static-list)")
+ },
+ {
+ PCMK_STONITH_HOST_CHECK,NULL, "string", NULL, "dynamic-list", NULL,
+ N_("How to determine which machines are controlled by the device."),
+ N_("Allowed values: dynamic-list "
+ "(query the device via the 'list' command), static-list "
+ "(check the pcmk_host_list attribute), status "
+ "(query the device via the 'status' command), "
+ "none (assume every device can fence every "
+ "machine)")
+ },
+ {
+ PCMK_STONITH_DELAY_MAX,NULL, "time", NULL, "0s", NULL,
+ N_("Enable a base delay for fencing actions and specify base delay value."),
+ N_("Enable a delay of no more than the "
+ "time specified before executing fencing actions. Pacemaker "
+ "derives the overall delay by taking the value of "
+ "pcmk_delay_base and adding a random delay value such "
+ "that the sum is kept below this maximum.")
+ },
+ {
+ PCMK_STONITH_DELAY_BASE,NULL, "string", NULL, "0s", NULL,
+ N_("Enable a base delay for "
+ "fencing actions and specify base delay value."),
+ N_("This enables a static delay for "
+ "fencing actions, which can help avoid \"death matches\" where "
+ "two nodes try to fence each other at the same time. If "
+ "pcmk_delay_max is also used, a random delay will be "
+ "added such that the total delay is kept below that value."
+ "This can be set to a single time value to apply to any node "
+ "targeted by this device (useful if a separate device is "
+ "configured for each target), or to a node map (for example, "
+ "\"node1:1s;node2:5\") to set a different value per target.")
+ },
+ {
+ PCMK_STONITH_ACTION_LIMIT,NULL, "integer", NULL, "1", NULL,
+ N_("The maximum number of actions can be performed in parallel on this device"),
+ N_("Cluster property concurrent-fencing=true needs to be configured first."
+ "Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.")
+ },
+ {
+ "pcmk_reboot_action",NULL, "string", NULL, "reboot", NULL,
+ N_("Advanced use only: An alternate command to run instead of 'reboot'"),
+ N_("Some devices do not support the standard commands or may provide additional ones.\n"
+ "Use this to specify an alternate, device-specific, command that implements the \'reboot\' action.")
+ },
+ {
+ "pcmk_reboot_timeout",NULL, "time", NULL, "60s", NULL,
+ N_("Advanced use only: Specify an alternate timeout to use for reboot actions instead of stonith-timeout"),
+ N_("Some devices need much more/less time to complete than normal."
+ "Use this to specify an alternate, device-specific, timeout for \'reboot\' actions.")
+ },
+ {
+ "pcmk_reboot_retries",NULL, "integer", NULL, "2", NULL,
+ N_("Advanced use only: The maximum number of times to retry the 'reboot' command within the timeout period"),
+ N_("Some devices do not support multiple connections."
+ " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
+ " Use this option to alter the number of times Pacemaker retries \'reboot\' actions before giving up.")
+ },
+ {
+ "pcmk_off_action",NULL, "string", NULL, "off", NULL,
+ N_("Advanced use only: An alternate command to run instead of \'off\'"),
+ N_("Some devices do not support the standard commands or may provide additional ones."
+ "Use this to specify an alternate, device-specific, command that implements the \'off\' action.")
+ },
+ {
+ "pcmk_off_timeout",NULL, "time", NULL, "60s", NULL,
+ N_("Advanced use only: Specify an alternate timeout to use for off actions instead of stonith-timeout"),
+ N_("Some devices need much more/less time to complete than normal."
+ "Use this to specify an alternate, device-specific, timeout for \'off\' actions.")
+ },
+ {
+ "pcmk_off_retries",NULL, "integer", NULL, "2", NULL,
+ N_("Advanced use only: The maximum number of times to retry the 'off' command within the timeout period"),
+ N_("Some devices do not support multiple connections."
+ " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
+ " Use this option to alter the number of times Pacemaker retries \'off\' actions before giving up.")
+ },
+ {
+ "pcmk_on_action",NULL, "string", NULL, "on", NULL,
+ N_("Advanced use only: An alternate command to run instead of 'on'"),
+ N_("Some devices do not support the standard commands or may provide additional ones."
+ "Use this to specify an alternate, device-specific, command that implements the \'on\' action.")
+ },
+ {
+ "pcmk_on_timeout",NULL, "time", NULL, "60s", NULL,
+ N_("Advanced use only: Specify an alternate timeout to use for on actions instead of stonith-timeout"),
+ N_("Some devices need much more/less time to complete than normal."
+ "Use this to specify an alternate, device-specific, timeout for \'on\' actions.")
+ },
+ {
+ "pcmk_on_retries",NULL, "integer", NULL, "2", NULL,
+ N_("Advanced use only: The maximum number of times to retry the 'on' command within the timeout period"),
+ N_("Some devices do not support multiple connections."
+ " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
+ " Use this option to alter the number of times Pacemaker retries \'on\' actions before giving up.")
+ },
+ {
+ "pcmk_list_action",NULL, "string", NULL, "list", NULL,
+ N_("Advanced use only: An alternate command to run instead of \'list\'"),
+ N_("Some devices do not support the standard commands or may provide additional ones."
+ "Use this to specify an alternate, device-specific, command that implements the \'list\' action.")
+ },
+ {
+ "pcmk_list_timeout",NULL, "time", NULL, "60s", NULL,
+ N_("Advanced use only: Specify an alternate timeout to use for list actions instead of stonith-timeout"),
+ N_("Some devices need much more/less time to complete than normal."
+ "Use this to specify an alternate, device-specific, timeout for \'list\' actions.")
+ },
+ {
+ "pcmk_list_retries",NULL, "integer", NULL, "2", NULL,
+ N_("Advanced use only: The maximum number of times to retry the \'list\' command within the timeout period"),
+ N_("Some devices do not support multiple connections."
+ " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
+ " Use this option to alter the number of times Pacemaker retries \'list\' actions before giving up.")
+ },
+ {
+ "pcmk_monitor_action",NULL, "string", NULL, "monitor", NULL,
+ N_("Advanced use only: An alternate command to run instead of \'monitor\'"),
+ N_("Some devices do not support the standard commands or may provide additional ones."
+ "Use this to specify an alternate, device-specific, command that implements the \'monitor\' action.")
+ },
+ {
+ "pcmk_monitor_timeout",NULL, "time", NULL, "60s", NULL,
+ N_("Advanced use only: Specify an alternate timeout to use for monitor actions instead of stonith-timeout"),
+ N_("Some devices need much more/less time to complete than normal.\n"
+ "Use this to specify an alternate, device-specific, timeout for \'monitor\' actions.")
+ },
+ {
+ "pcmk_monitor_retries",NULL, "integer", NULL, "2", NULL,
+ N_("Advanced use only: The maximum number of times to retry the \'monitor\' command within the timeout period"),
+ N_("Some devices do not support multiple connections."
+ " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
+ " Use this option to alter the number of times Pacemaker retries \'monitor\' actions before giving up.")
+ },
+ {
+ "pcmk_status_action",NULL, "string", NULL, "status", NULL,
+ N_("Advanced use only: An alternate command to run instead of \'status\'"),
+ N_("Some devices do not support the standard commands or may provide additional ones."
+ "Use this to specify an alternate, device-specific, command that implements the \'status\' action.")
+ },
+ {
+ "pcmk_status_timeout",NULL, "time", NULL, "60s", NULL,
+ N_("Advanced use only: Specify an alternate timeout to use for status actions instead of stonith-timeout"),
+ N_("Some devices need much more/less time to complete than normal."
+ "Use this to specify an alternate, device-specific, timeout for \'status\' actions.")
+ },
+ {
+ "pcmk_status_retries",NULL, "integer", NULL, "2", NULL,
+ N_("Advanced use only: The maximum number of times to retry the \'status\' command within the timeout period"),
+ N_("Some devices do not support multiple connections."
+ " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
+ " Use this option to alter the number of times Pacemaker retries \'status\' actions before giving up.")
+ },
+};
+
+void
+fencer_metadata(void)
+{
+ const char *desc_short = N_("Instance attributes available for all "
+ "\"stonith\"-class resources");
+ const char *desc_long = N_("Instance attributes available for all \"stonith\"-"
+ "class resources and used by Pacemaker's fence "
+ "daemon, formerly known as stonithd");
+
+ gchar *s = pcmk__format_option_metadata("pacemaker-fenced", desc_short,
+ desc_long, fencer_options,
+ PCMK__NELEM(fencer_options));
+ printf("%s", s);
+ g_free(s);
+}
+
+static GOptionEntry entries[] = {
+ { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone,
+ "Deprecated (will be removed in a future release)", NULL },
+
+ { "stand-alone-w-cpg", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
+ stand_alone_cpg_cb, "Intended for use in regression testing only", NULL },
+
+ { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY,
+ &options.log_files, "Send logs to the additional named logfile", NULL },
+
+ { NULL }
+};
+
+static GOptionContext *
+build_arg_context(pcmk__common_args_t *args, GOptionGroup **group)
+{
+ GOptionContext *context = NULL;
+
+ context = pcmk__build_arg_context(args, "text (default), xml", group,
+ "[metadata]");
+ pcmk__add_main_args(context, entries);
+ return context;
+}
+
+int
+main(int argc, char **argv)
+{
+ int rc = pcmk_rc_ok;
+ crm_cluster_t *cluster = NULL;
+ crm_ipc_t *old_instance = NULL;
+
+ GError *error = NULL;
+
+ GOptionGroup *output_group = NULL;
+ pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
+ gchar **processed_args = pcmk__cmdline_preproc(argv, "l");
+ GOptionContext *context = build_arg_context(args, &output_group);
+
+ crm_log_preinit(NULL, argc, argv);
+
+ pcmk__register_formats(output_group, formats);
+ if (!g_option_context_parse_strv(context, &processed_args, &error)) {
+ exit_code = CRM_EX_USAGE;
+ goto done;
+ }
+
+ rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
+ if (rc != pcmk_rc_ok) {
+ exit_code = CRM_EX_ERROR;
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Error creating output format %s: %s",
+ args->output_ty, pcmk_rc_str(rc));
+ goto done;
+ }
+
+ if (args->version) {
+ out->version(out, false);
+ goto done;
+ }
+
+ if ((g_strv_length(processed_args) >= 2)
+ && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) {
+ fencer_metadata();
+ goto done;
+ }
+
+ // Open additional log files
+ pcmk__add_logfiles(options.log_files, out);
+
+ crm_log_init(NULL, LOG_INFO + args->verbosity, TRUE,
+ (args->verbosity > 0), argc, argv, FALSE);
+
+ crm_notice("Starting Pacemaker fencer");
+
+ old_instance = crm_ipc_new("stonith-ng", 0);
+ if (old_instance == NULL) {
+ /* crm_ipc_new() will have already logged an error message with
+ * crm_err()
+ */
+ exit_code = CRM_EX_FATAL;
+ goto done;
+ }
+
+ if (crm_ipc_connect(old_instance)) {
+ // IPC endpoint already up
+ crm_ipc_close(old_instance);
+ crm_ipc_destroy(old_instance);
+ crm_err("pacemaker-fenced is already active, aborting startup");
+ goto done;
+ } else {
+ // Not up or not authentic, we'll proceed either way
+ crm_ipc_destroy(old_instance);
+ old_instance = NULL;
+ }
+
+ mainloop_add_signal(SIGTERM, stonith_shutdown);
+
+ crm_peer_init();
+
+ fenced_data_set = pe_new_working_set();
+ CRM_ASSERT(fenced_data_set != NULL);
+
+ cluster = pcmk_cluster_new();
+
+ /* Initialize the logger prior to setup_cib(). update_cib_cache_cb() may
+ * call the "xml-patchset" message function, which needs the logger, after
+ * setup_cib() has run.
+ */
+ rc = pcmk__log_output_new(&logger_out) != pcmk_rc_ok;
+ if (rc != pcmk_rc_ok) {
+ exit_code = CRM_EX_FATAL;
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Error creating output format log: %s", pcmk_rc_str(rc));
+ goto done;
+ }
+ pe__register_messages(logger_out);
+ pcmk__register_lib_messages(logger_out);
+ pcmk__output_set_log_level(logger_out, LOG_TRACE);
+ fenced_data_set->priv = logger_out;
+
+ if (!stand_alone) {
+#if SUPPORT_COROSYNC
+ if (is_corosync_cluster()) {
+ cluster->destroy = stonith_peer_cs_destroy;
+ cluster->cpg.cpg_deliver_fn = stonith_peer_ais_callback;
+ cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership;
+ }
+#endif // SUPPORT_COROSYNC
+
+ crm_set_status_callback(&st_peer_update_callback);
+
+ if (crm_cluster_connect(cluster) == FALSE) {
+ exit_code = CRM_EX_FATAL;
+ crm_crit("Cannot sign in to the cluster... terminating");
+ goto done;
+ }
+ pcmk__str_update(&stonith_our_uname, cluster->uname);
+
+ if (!options.no_cib_connect) {
+ setup_cib();
+ }
+
+ } else {
+ pcmk__str_update(&stonith_our_uname, "localhost");
+ crm_warn("Stand-alone mode is deprecated and will be removed "
+ "in a future release");
+ }
+
+ init_device_list();
+ init_topology_list();
+
+ pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks);
+
+ // Create the mainloop and run it...
+ mainloop = g_main_loop_new(NULL, FALSE);
+ crm_notice("Pacemaker fencer successfully started and accepting connections");
+ g_main_loop_run(mainloop);
+
+done:
+ g_strfreev(processed_args);
+ pcmk__free_arg_context(context);
+
+ g_strfreev(options.log_files);
+
+ stonith_cleanup();
+ pcmk_cluster_free(cluster);
+ pe_free_working_set(fenced_data_set);
+
+ pcmk__output_and_clear_error(&error, out);
+
+ if (logger_out != NULL) {
+ logger_out->finish(logger_out, exit_code, true, NULL);
+ pcmk__output_free(logger_out);
+ }
+
+ if (out != NULL) {
+ out->finish(out, exit_code, true, NULL);
+ pcmk__output_free(out);
+ }
+
+ pcmk__unregister_formats();
+ crm_exit(exit_code);
+}
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
new file mode 100644
index 0000000..a3d2e17
--- /dev/null
+++ b/daemons/fenced/pacemaker-fenced.h
@@ -0,0 +1,315 @@
+/*
+ * Copyright 2009-2023 the Pacemaker project contributors
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <stdint.h> // uint32_t, uint64_t
+#include <crm/common/mainloop.h>
+
+/*!
+ * \internal
+ * \brief Check whether target has already been fenced recently
+ *
+ * \param[in] tolerance Number of seconds to look back in time
+ * \param[in] target Name of node to search for
+ * \param[in] action Action we want to match
+ *
+ * \return TRUE if an equivalent fencing operation took place in the last
+ * \p tolerance seconds, FALSE otherwise
+ */
+gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action);
+
+typedef struct stonith_device_s {
+ char *id;
+ char *agent;
+ char *namespace;
+
+ /*! list of actions that must execute on the target node. Used for unfencing */
+ GString *on_target_actions;
+ GList *targets;
+ time_t targets_age;
+ gboolean has_attr_map;
+
+ // Whether target's nodeid should be passed as a parameter to the agent
+ gboolean include_nodeid;
+
+ /* whether the cluster should automatically unfence nodes with the device */
+ gboolean automatic_unfencing;
+ guint priority;
+
+ uint32_t flags; // Group of enum st_device_flags
+
+ GHashTable *params;
+ GHashTable *aliases;
+ GList *pending_ops;
+ mainloop_timer_t *timer;
+ crm_trigger_t *work;
+ xmlNode *agent_metadata;
+
+ /*! A verified device is one that has contacted the
+ * agent successfully to perform a monitor operation */
+ gboolean verified;
+
+ gboolean cib_registered;
+ gboolean api_registered;
+ gboolean dirty;
+} stonith_device_t;
+
+/* These values are used to index certain arrays by "phase". Usually an
+ * operation has only one "phase", so phase is always zero. However, some
+ * reboots are remapped to "off" then "on", in which case "reboot" will be
+ * phase 0, "off" will be phase 1 and "on" will be phase 2.
+ */
+enum st_remap_phase {
+ st_phase_requested = 0,
+ st_phase_off = 1,
+ st_phase_on = 2,
+ st_phase_max = 3
+};
+
+typedef struct remote_fencing_op_s {
+ /* The unique id associated with this operation */
+ char *id;
+ /*! The node this operation will fence */
+ char *target;
+ /*! The fencing action to perform on the target. (reboot, on, off) */
+ char *action;
+
+ /*! When was the fencing action recorded (seconds since epoch) */
+ time_t created;
+
+ /*! Marks if the final notifications have been sent to local stonith clients. */
+ gboolean notify_sent;
+ /*! The number of query replies received */
+ guint replies;
+ /*! The number of query replies expected */
+ guint replies_expected;
+ /*! Does this node own control of this operation */
+ gboolean owner;
+ /*! After query is complete, This the high level timer that expires the entire operation */
+ guint op_timer_total;
+ /*! This timer expires the current fencing request. Many fencing
+ * requests may exist in a single operation */
+ guint op_timer_one;
+ /*! This timer expires the query request sent out to determine
+ * what nodes are contain what devices, and who those devices can fence */
+ guint query_timer;
+ /*! This is the default timeout to use for each fencing device if no
+ * custom timeout is received in the query. */
+ gint base_timeout;
+ /*! This is the calculated total timeout an operation can take before
+ * expiring. This is calculated by adding together all the timeout
+ * values associated with the devices this fencing operation may call */
+ gint total_timeout;
+
+ /*! Requested fencing delay.
+ * Value -1 means disable any static/random fencing delays. */
+ int delay;
+
+ /*! Delegate is the node being asked to perform a fencing action
+ * on behalf of the node that owns the remote operation. Some operations
+ * will involve multiple delegates. This value represents the final delegate
+ * that is used. */
+ char *delegate;
+ /*! The point at which the remote operation completed */
+ time_t completed;
+ //! Group of enum stonith_call_options associated with this operation
+ uint32_t call_options;
+
+ /*! The current state of the remote operation. This indicates
+ * what stage the op is in, query, exec, done, duplicate, failed. */
+ enum op_state state;
+ /*! The node that owns the remote operation */
+ char *originator;
+ /*! The local client id that initiated the fencing request */
+ char *client_id;
+ /*! The client's call_id that initiated the fencing request */
+ int client_callid;
+ /*! The name of client that initiated the fencing request */
+ char *client_name;
+ /*! List of the received query results for all the nodes in the cpg group */
+ GList *query_results;
+ /*! The original request that initiated the remote stonith operation */
+ xmlNode *request;
+
+ /*! The current topology level being executed */
+ guint level;
+ /*! The current operation phase being executed */
+ enum st_remap_phase phase;
+
+ /*! Devices with automatic unfencing (always run if "on" requested, never if remapped) */
+ GList *automatic_list;
+ /*! List of all devices at the currently executing topology level */
+ GList *devices_list;
+ /*! Current entry in the topology device list */
+ GList *devices;
+
+ /*! List of duplicate operations attached to this operation. Once this operation
+ * completes, the duplicate operations will be closed out as well. */
+ GList *duplicates;
+
+ /*! The point at which the remote operation completed(nsec) */
+ long long completed_nsec;
+
+ /*! The (potentially intermediate) result of the operation */
+ pcmk__action_result_t result;
+} remote_fencing_op_t;
+
+void fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged);
+
+// Fencer-specific client flags
+enum st_client_flags {
+ st_callback_unknown = UINT64_C(0),
+ st_callback_notify_fence = (UINT64_C(1) << 0),
+ st_callback_device_add = (UINT64_C(1) << 2),
+ st_callback_device_del = (UINT64_C(1) << 4),
+ st_callback_notify_history = (UINT64_C(1) << 5),
+ st_callback_notify_history_synced = (UINT64_C(1) << 6)
+};
+
+// How the user specified the target of a topology level
+enum fenced_target_by {
+ fenced_target_by_unknown = -1, // Invalid or not yet parsed
+ fenced_target_by_name, // By target name
+ fenced_target_by_pattern, // By a pattern matching target names
+ fenced_target_by_attribute, // By a node attribute/value on target
+};
+
+/*
+ * Complex fencing requirements are specified via fencing topologies.
+ * A topology consists of levels; each level is a list of fencing devices.
+ * Topologies are stored in a hash table by node name. When a node needs to be
+ * fenced, if it has an entry in the topology table, the levels are tried
+ * sequentially, and the devices in each level are tried sequentially.
+ * Fencing is considered successful as soon as any level succeeds;
+ * a level is considered successful if all its devices succeed.
+ * Essentially, all devices at a given level are "and-ed" and the
+ * levels are "or-ed".
+ *
+ * This structure is used for the topology table entries.
+ * Topology levels start from 1, so levels[0] is unused and always NULL.
+ */
+typedef struct stonith_topology_s {
+ enum fenced_target_by kind; // How target was specified
+
+ /*! Node name regex or attribute name=value for which topology applies */
+ char *target;
+ char *target_value;
+ char *target_pattern;
+ char *target_attribute;
+
+ /*! Names of fencing devices at each topology level */
+ GList *levels[ST_LEVEL_MAX];
+
+} stonith_topology_t;
+
+void init_device_list(void);
+void free_device_list(void);
+void init_topology_list(void);
+void free_topology_list(void);
+void free_stonith_remote_op_list(void);
+void init_stonith_remote_op_hash_table(GHashTable **table);
+void free_metadata_cache(void);
+void fenced_unregister_handlers(void);
+
+uint64_t get_stonith_flag(const char *name);
+
+void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
+ xmlNode *op_request, const char *remote_peer);
+
+int stonith_device_register(xmlNode *msg, gboolean from_cib);
+
+void stonith_device_remove(const char *id, bool from_cib);
+
+char *stonith_level_key(const xmlNode *msg, enum fenced_target_by);
+void fenced_register_level(xmlNode *msg, char **desc,
+ pcmk__action_result_t *result);
+void fenced_unregister_level(xmlNode *msg, char **desc,
+ pcmk__action_result_t *result);
+
+stonith_topology_t *find_topology_for_host(const char *host);
+
+void do_local_reply(xmlNode *notify_src, pcmk__client_t *client,
+ int call_options);
+
+xmlNode *fenced_construct_reply(const xmlNode *request, xmlNode *data,
+ const pcmk__action_result_t *result);
+
+void
+ do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout);
+
+void fenced_send_notification(const char *type,
+ const pcmk__action_result_t *result,
+ xmlNode *data);
+void fenced_send_device_notification(const char *op,
+ const pcmk__action_result_t *result,
+ const char *desc);
+void fenced_send_level_notification(const char *op,
+ const pcmk__action_result_t *result,
+ const char *desc);
+
+remote_fencing_op_t *initiate_remote_stonith_op(const pcmk__client_t *client,
+ xmlNode *request,
+ gboolean manual_ack);
+
+void fenced_process_fencing_reply(xmlNode *msg);
+
+int process_remote_stonith_query(xmlNode * msg);
+
+void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer);
+
+void stonith_fence_history(xmlNode *msg, xmlNode **output,
+ const char *remote_peer, int options);
+
+void stonith_fence_history_trim(void);
+
+bool fencing_peer_active(crm_node_t *peer);
+
+void set_fencing_completed(remote_fencing_op_t * op);
+
+int fenced_handle_manual_confirmation(const pcmk__client_t *client,
+ xmlNode *msg);
+void fencer_metadata(void);
+
+const char *fenced_device_reboot_action(const char *device_id);
+bool fenced_device_supports_on(const char *device_id);
+
+gboolean node_has_attr(const char *node, const char *name, const char *value);
+
+gboolean node_does_watchdog_fencing(const char *node);
+
+static inline void
+fenced_set_protocol_error(pcmk__action_result_t *result)
+{
+ pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
+ "Fencer API request missing required information (bug?)");
+}
+
+/*!
+ * \internal
+ * \brief Get the device flag to use with a given action when searching devices
+ *
+ * \param[in] action Action to check
+ *
+ * \return st_device_supports_on if \p action is "on", otherwise
+ * st_device_supports_none
+ */
+static inline uint32_t
+fenced_support_flag(const char *action)
+{
+ if (pcmk__str_eq(action, "on", pcmk__str_none)) {
+ return st_device_supports_on;
+ }
+ return st_device_supports_none;
+}
+
+extern char *stonith_our_uname;
+extern gboolean stand_alone;
+extern GHashTable *device_list;
+extern GHashTable *topology;
+extern long stonith_watchdog_timeout_ms;
+extern GList *stonith_watchdog_targets;
+
+extern GHashTable *stonith_remote_op_list;
diff --git a/daemons/pacemakerd/Makefile.am b/daemons/pacemakerd/Makefile.am
new file mode 100644
index 0000000..fc0e014
--- /dev/null
+++ b/daemons/pacemakerd/Makefile.am
@@ -0,0 +1,37 @@
+#
+# Copyright 2004-2021 the Pacemaker project contributors
+#
+# The version control history for this file may have further details.
+#
+# This source code is licensed under the GNU General Public License version 2
+# or later (GPLv2+) WITHOUT ANY WARRANTY.
+#
+
+include $(top_srcdir)/mk/common.mk
+include $(top_srcdir)/mk/man.mk
+
+sbin_PROGRAMS = pacemakerd
+
+if BUILD_SYSTEMD
+systemdsystemunit_DATA = pacemaker.service
+endif
+
+EXTRA_DIST = pacemakerd.8.inc
+
+## SOURCES
+
+noinst_HEADERS = pacemakerd.h
+
+pacemakerd_CFLAGS = $(CFLAGS_HARDENED_EXE)
+pacemakerd_LDFLAGS = $(LDFLAGS_HARDENED_EXE)
+
+pacemakerd_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la $(top_builddir)/lib/common/libcrmcommon.la
+pacemakerd_LDADD += $(CLUSTERLIBS)
+pacemakerd_SOURCES = pacemakerd.c
+if BUILD_CS_SUPPORT
+pacemakerd_SOURCES += pcmkd_corosync.c
+endif
+pacemakerd_SOURCES += pcmkd_messages.c
+pacemakerd_SOURCES += pcmkd_subdaemons.c
+
+CLEANFILES = $(man8_MANS)
diff --git a/daemons/pacemakerd/pacemaker.combined.upstart.in b/daemons/pacemakerd/pacemaker.combined.upstart.in
new file mode 100644
index 0000000..af59ff0
--- /dev/null
+++ b/daemons/pacemakerd/pacemaker.combined.upstart.in
@@ -0,0 +1,67 @@
+# pacemaker-corosync - High-Availability cluster
+#
+# Starts Corosync cluster engine and Pacemaker cluster manager.
+
+# if you use automatic start, uncomment the line below.
+#start on started local and runlevel [2345]
+
+stop on runlevel [0123456]
+kill timeout 3600
+respawn
+
+env prog=pacemakerd
+env sysconf=@CONFIGDIR@/pacemaker
+env rpm_lockdir=@localstatedir@/lock/subsys
+env deb_lockdir=@localstatedir@/lock
+
+script
+ [ -f "$sysconf" ] && . "$sysconf"
+ exec $prog
+end script
+
+pre-start script
+ pidof corosync || start corosync
+
+ # if you use corosync-notifyd, uncomment the line below.
+ #start corosync-notifyd
+
+ # give it time to fail.
+ sleep 2
+ pidof corosync || { exit 1; }
+
+ # if you use crm_mon, uncomment the line below.
+ #start crm_mon
+end script
+
+post-start script
+ [ -f "$sysconf" ] && . "$sysconf"
+ [ -z "$LOCK_FILE" -a -d "$rpm_lockdir" ] && LOCK_FILE="$rpm_lockdir/pacemaker"
+ [ -z "$LOCK_FILE" -a -d "$deb_lockdir" ] && LOCK_FILE="$deb_lockdir/pacemaker"
+ touch "$LOCK_FILE"
+ pidof $prog > "@localstatedir@/run/$prog.pid"
+end script
+
+post-stop script
+ [ -f "$sysconf" ] && . "$sysconf"
+ [ -z "$LOCK_FILE" -a -d "$rpm_lockdir" ] && LOCK_FILE="$rpm_lockdir/pacemaker"
+ [ -z "$LOCK_FILE" -a -d "$deb_lockdir" ] && LOCK_FILE="$deb_lockdir/pacemaker"
+ rm -f "$LOCK_FILE"
+ rm -f "@localstatedir@/run/$prog.pid"
+
+ # if you use corosync-notifyd, uncomment the line below.
+ #stop corosync-notifyd || true
+
+ # if you use watchdog of corosync, uncomment the line below.
+ #pidof corosync || false
+
+ pidof pacemaker-controld || stop corosync
+
+ # if you want to reboot a machine by watchdog of corosync when
+ # pacemakerd disappeared unexpectedly, uncomment the line below
+ # and invalidate above "respawn" stanza.
+ #pidof pacemaker-controld && killall -q -9 corosync
+
+ # if you use crm_mon, uncomment the line below.
+ #stop crm_mon
+
+end script
diff --git a/daemons/pacemakerd/pacemaker.service.in b/daemons/pacemakerd/pacemaker.service.in
new file mode 100644
index 0000000..3fd53d9
--- /dev/null
+++ b/daemons/pacemakerd/pacemaker.service.in
@@ -0,0 +1,103 @@
+[Unit]
+Description=Pacemaker High Availability Cluster Manager
+Documentation=man:pacemakerd
+Documentation=https://clusterlabs.org/pacemaker/doc/
+
+# DefaultDependencies takes care of sysinit.target,
+# basic.target, and shutdown.target
+
+# We need networking to bind to a network address. It is recommended not to
+# use Wants or Requires with network.target, and not to use
+# network-online.target for server daemons.
+After=network.target
+
+# Time syncs can make the clock jump backward, which messes with logging
+# and failure timestamps, so wait until it's done.
+After=time-sync.target
+
+# Managing systemd resources requires DBus.
+After=dbus.service
+Wants=dbus.service
+
+# Some OCF resources may have dependencies that aren't managed by the cluster;
+# these must be started before Pacemaker and stopped after it. The
+# resource-agents package provides this target, which lets system adminstrators
+# add drop-ins for those dependencies.
+After=resource-agents-deps.target
+Wants=resource-agents-deps.target
+
+After=syslog.service
+After=rsyslog.service
+After=corosync.service
+Requires=corosync.service
+
+# If Pacemaker respawns repeatedly, give up after this many tries in this time
+StartLimitBurst=5
+StartLimitIntervalSec=25s
+
+[Install]
+WantedBy=multi-user.target
+
+
+[Service]
+Type=simple
+KillMode=process
+NotifyAccess=main
+EnvironmentFile=-@CONFIGDIR@/pacemaker
+EnvironmentFile=-@CONFIGDIR@/sbd
+SuccessExitStatus=100
+
+ExecStart=@sbindir@/pacemakerd
+
+# Systemd v227 and above can limit the number of processes spawned by a
+# service. That is a bad idea for an HA cluster resource manager, so disable it
+# by default. The administrator can create a local override if they really want
+# a limit. If your systemd version does not support TasksMax, and you want to
+# get rid of the resulting log warnings, comment out this option.
+TasksMax=infinity
+
+# If pacemakerd doesn't stop, it's probably waiting on a cluster
+# resource. Sending -KILL will just get the node fenced
+SendSIGKILL=no
+
+# Systemd's default of respawning a failed service after 100ms is too aggressive
+RestartSec=1s
+
+# If we ever hit the StartLimitInterval/StartLimitBurst limit, and the
+# admin wants to stop the cluster while pacemakerd is not running, it
+# might be a good idea to enable the ExecStopPost directive below.
+#
+# However, the node will likely end up being fenced as a result, so it's
+# not enabled by default.
+#
+# ExecStopPost=/usr/bin/killall -TERM pacemaker-attrd pacemaker-based \
+# pacemaker-controld pacemaker-execd pacemaker-fenced \
+# pacemaker-schedulerd
+
+# If you want Corosync to stop whenever Pacemaker is stopped,
+# uncomment the next line too:
+#
+# ExecStopPost=/bin/sh -c 'pidof pacemaker-controld || killall -TERM corosync'
+
+# Pacemaker will restart along with Corosync if Corosync is stopped while
+# Pacemaker is running.
+# In this case, if you want to be fenced always (if you do not want to restart)
+# uncomment ExecStopPost below.
+#
+# ExecStopPost=/bin/sh -c 'pidof corosync || \
+# /usr/bin/systemctl --no-block stop pacemaker'
+
+# When the service functions properly, it will wait to exit until all resources
+# have been stopped on the local node, and potentially across all nodes that
+# are shutting down. The default of 30min should cover most typical cluster
+# configurations, but it may need an increase to adapt to local conditions
+# (e.g. a large, clustered database could conceivably take longer to stop).
+TimeoutStopSec=30min
+TimeoutStartSec=60s
+
+# Restart options include: no, on-success, on-failure, on-abort or always
+Restart=on-failure
+
+# crm_perror() writes directly to stderr, so ignore it here
+# to avoid double-logging with the wrong format
+StandardError=null
diff --git a/daemons/pacemakerd/pacemaker.upstart.in b/daemons/pacemakerd/pacemaker.upstart.in
new file mode 100644
index 0000000..7a54bc0
--- /dev/null
+++ b/daemons/pacemakerd/pacemaker.upstart.in
@@ -0,0 +1,33 @@
+# pacemaker - High-Availability cluster resource manager
+#
+# Starts pacemakerd
+
+stop on runlevel [0123456]
+kill timeout 3600
+respawn
+
+env prog=pacemakerd
+env sysconf=@CONFIGDIR@/pacemaker
+env rpm_lockdir=@localstatedir@/lock/subsys
+env deb_lockdir=@localstatedir@/lock
+
+script
+ [ -f "$sysconf" ] && . "$sysconf"
+ exec $prog
+end script
+
+post-start script
+ [ -f "$sysconf" ] && . "$sysconf"
+ [ -z "$LOCK_FILE" -a -d "$rpm_lockdir" ] && LOCK_FILE="$rpm_lockdir/pacemaker"
+ [ -z "$LOCK_FILE" -a -d "$deb_lockdir" ] && LOCK_FILE="$deb_lockdir/pacemaker"
+ touch "$LOCK_FILE"
+ pidof $prog > "@localstatedir@/run/$prog.pid"
+end script
+
+post-stop script
+ [ -f "$sysconf" ] && . "$sysconf"
+ [ -z "$LOCK_FILE" -a -d "$rpm_lockdir" ] && LOCK_FILE="$rpm_lockdir/pacemaker"
+ [ -z "$LOCK_FILE" -a -d "$deb_lockdir" ] && LOCK_FILE="$deb_lockdir/pacemaker"
+ rm -f "$LOCK_FILE"
+ rm -f "@localstatedir@/run/$prog.pid"
+end script
diff --git a/daemons/pacemakerd/pacemakerd.8.inc b/daemons/pacemakerd/pacemakerd.8.inc
new file mode 100644
index 0000000..902af4e
--- /dev/null
+++ b/daemons/pacemakerd/pacemakerd.8.inc
@@ -0,0 +1,5 @@
+[synopsis]
+pacemakerd [options]
+
+/subsidiary Pacemaker daemons/
+.SH OPTIONS
diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c
new file mode 100644
index 0000000..9f77ccc
--- /dev/null
+++ b/daemons/pacemakerd/pacemakerd.c
@@ -0,0 +1,483 @@
+/*
+ * Copyright 2010-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include "pacemakerd.h"
+
+#include <pwd.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include <crm/crm.h> /* indirectly: CRM_EX_* */
+#include <crm/msg_xml.h>
+#include <crm/common/mainloop.h>
+#include <crm/common/cmdline_internal.h>
+#include <crm/common/ipc_pacemakerd.h>
+#include <crm/common/output_internal.h>
+#include <crm/cluster/internal.h>
+#include <crm/cluster.h>
+
+#define SUMMARY "pacemakerd - primary Pacemaker daemon that launches and monitors all subsidiary Pacemaker daemons"
+
+struct {
+ gboolean features;
+ gboolean foreground;
+ gboolean shutdown;
+ gboolean standby;
+} options;
+
+static pcmk__output_t *out = NULL;
+
+static pcmk__supported_format_t formats[] = {
+ PCMK__SUPPORTED_FORMAT_NONE,
+ PCMK__SUPPORTED_FORMAT_TEXT,
+ PCMK__SUPPORTED_FORMAT_XML,
+ { NULL, NULL, NULL }
+};
+
+PCMK__OUTPUT_ARGS("features")
+static int
+pacemakerd_features(pcmk__output_t *out, va_list args) {
+ out->info(out, "Pacemaker %s (Build: %s)\n Supporting v%s: %s", PACEMAKER_VERSION,
+ BUILD_VERSION, CRM_FEATURE_SET, CRM_FEATURES);
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("features")
+static int
+pacemakerd_features_xml(pcmk__output_t *out, va_list args) {
+ gchar **feature_list = g_strsplit(CRM_FEATURES, " ", 0);
+
+ pcmk__output_xml_create_parent(out, "pacemakerd",
+ "version", PACEMAKER_VERSION,
+ "build", BUILD_VERSION,
+ "feature_set", CRM_FEATURE_SET,
+ NULL);
+ out->begin_list(out, NULL, NULL, "features");
+
+ for (char **s = feature_list; *s != NULL; s++) {
+ pcmk__output_create_xml_text_node(out, "feature", *s);
+ }
+
+ out->end_list(out);
+
+ g_strfreev(feature_list);
+ return pcmk_rc_ok;
+}
+
+static pcmk__message_entry_t fmt_functions[] = {
+ { "features", "default", pacemakerd_features },
+ { "features", "xml", pacemakerd_features_xml },
+
+ { NULL, NULL, NULL }
+};
+
+static gboolean
+pid_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
+ return TRUE;
+}
+
+static gboolean
+standby_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
+ options.standby = TRUE;
+ pcmk__set_env_option("node_start_state", "standby");
+ return TRUE;
+}
+
+static GOptionEntry entries[] = {
+ { "features", 'F', 0, G_OPTION_ARG_NONE, &options.features,
+ "Display full version and list of features Pacemaker was built with",
+ NULL },
+ { "foreground", 'f', 0, G_OPTION_ARG_NONE, &options.foreground,
+ "(Ignored) Pacemaker always runs in the foreground",
+ NULL },
+ { "pid-file", 'p', 0, G_OPTION_ARG_CALLBACK, pid_cb,
+ "(Ignored) Daemon pid file location",
+ "FILE" },
+ { "shutdown", 'S', 0, G_OPTION_ARG_NONE, &options.shutdown,
+ "Instruct Pacemaker to shutdown on this machine",
+ NULL },
+ { "standby", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, standby_cb,
+ "Start node in standby state",
+ NULL },
+
+ { NULL }
+};
+
+static void
+pcmk_ignore(int nsig)
+{
+ crm_info("Ignoring signal %s (%d)", strsignal(nsig), nsig);
+}
+
+static void
+pcmk_sigquit(int nsig)
+{
+ pcmk__panic(__func__);
+}
+
+static void
+mcp_chown(const char *path, uid_t uid, gid_t gid)
+{
+ int rc = chown(path, uid, gid);
+
+ if (rc < 0) {
+ crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s",
+ path, CRM_DAEMON_USER, gid, pcmk_rc_str(errno));
+ }
+}
+
+static void
+create_pcmk_dirs(void)
+{
+ uid_t pcmk_uid = 0;
+ gid_t pcmk_gid = 0;
+
+ const char *dirs[] = {
+ CRM_PACEMAKER_DIR, // core/blackbox/scheduler/CIB files
+ CRM_CORE_DIR, // core files
+ CRM_BLACKBOX_DIR, // blackbox dumps
+ PE_STATE_DIR, // scheduler inputs
+ CRM_CONFIG_DIR, // the Cluster Information Base (CIB)
+ // Don't build CRM_RSCTMP_DIR, pacemaker-execd will do it
+ NULL
+ };
+
+ if (pcmk_daemon_user(&pcmk_uid, &pcmk_gid) < 0) {
+ crm_err("Cluster user %s does not exist, aborting Pacemaker startup",
+ CRM_DAEMON_USER);
+ crm_exit(CRM_EX_NOUSER);
+ }
+
+ // Used by some resource agents
+ if ((mkdir(CRM_STATE_DIR, 0750) < 0) && (errno != EEXIST)) {
+ crm_warn("Could not create directory " CRM_STATE_DIR ": %s",
+ pcmk_rc_str(errno));
+ } else {
+ mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid);
+ }
+
+ for (int i = 0; dirs[i] != NULL; ++i) {
+ int rc = pcmk__build_path(dirs[i], 0750);
+
+ if (rc != pcmk_rc_ok) {
+ crm_warn("Could not create directory %s: %s",
+ dirs[i], pcmk_rc_str(rc));
+ } else {
+ mcp_chown(dirs[i], pcmk_uid, pcmk_gid);
+ }
+ }
+}
+
+static void
+remove_core_file_limit(void)
+{
+ struct rlimit cores;
+
+ // Get current limits
+ if (getrlimit(RLIMIT_CORE, &cores) < 0) {
+ crm_notice("Unable to check system core file limits "
+ "(consider ensuring the size is unlimited): %s",
+ strerror(errno));
+ return;
+ }
+
+ // Check whether core dumps are disabled
+ if (cores.rlim_max == 0) {
+ if (geteuid() != 0) { // Yes, and there's nothing we can do about it
+ crm_notice("Core dumps are disabled (consider enabling them)");
+ return;
+ }
+ cores.rlim_max = RLIM_INFINITY; // Yes, but we're root, so enable them
+ }
+
+ // Raise soft limit to hard limit (if not already done)
+ if (cores.rlim_cur != cores.rlim_max) {
+ cores.rlim_cur = cores.rlim_max;
+ if (setrlimit(RLIMIT_CORE, &cores) < 0) {
+ crm_notice("Unable to raise system limit on core file size "
+ "(consider doing so manually): %s",
+ strerror(errno));
+ return;
+ }
+ }
+
+ if (cores.rlim_cur == RLIM_INFINITY) {
+ crm_trace("Core file size is unlimited");
+ } else {
+ crm_trace("Core file size is limited to %llu bytes",
+ (unsigned long long) cores.rlim_cur);
+ }
+}
+
+static void
+pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api,
+ enum pcmk_ipc_event event_type, crm_exit_t status,
+ void *event_data, void *user_data)
+{
+ pcmk_pacemakerd_api_reply_t *reply = event_data;
+
+ switch (event_type) {
+ case pcmk_ipc_event_reply:
+ break;
+
+ default:
+ return;
+ }
+
+ if (status != CRM_EX_OK) {
+ out->err(out, "Bad reply from pacemakerd: %s", crm_exit_str(status));
+ return;
+ }
+
+ if (reply->reply_type != pcmk_pacemakerd_reply_shutdown) {
+ out->err(out, "Unknown reply type %d from pacemakerd",
+ reply->reply_type);
+ }
+}
+
+static GOptionContext *
+build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
+ GOptionContext *context = NULL;
+
+ context = pcmk__build_arg_context(args, "text (default), xml", group, NULL);
+ pcmk__add_main_args(context, entries);
+ return context;
+}
+
+int
+main(int argc, char **argv)
+{
+ int rc = pcmk_rc_ok;
+ crm_exit_t exit_code = CRM_EX_OK;
+
+ GError *error = NULL;
+
+ GOptionGroup *output_group = NULL;
+ pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
+ gchar **processed_args = pcmk__cmdline_preproc(argv, "p");
+ GOptionContext *context = build_arg_context(args, &output_group);
+
+ bool old_instance_connected = false;
+
+ pcmk_ipc_api_t *old_instance = NULL;
+ qb_ipcs_service_t *ipcs = NULL;
+
+ subdaemon_check_progress = time(NULL);
+
+ setenv("LC_ALL", "C", 1); // Ensure logs are in a common language
+
+ crm_log_preinit(NULL, argc, argv);
+ mainloop_add_signal(SIGHUP, pcmk_ignore);
+ mainloop_add_signal(SIGQUIT, pcmk_sigquit);
+
+ pcmk__register_formats(output_group, formats);
+ if (!g_option_context_parse_strv(context, &processed_args, &error)) {
+ exit_code = CRM_EX_USAGE;
+ goto done;
+ }
+
+ rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
+ if ((rc != pcmk_rc_ok) || (out == NULL)) {
+ exit_code = CRM_EX_ERROR;
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s",
+ args->output_ty, pcmk_rc_str(rc));
+ goto done;
+ }
+
+ pcmk__force_args(context, &error, "%s --xml-simple-list", g_get_prgname());
+
+ pcmk__register_messages(out, fmt_functions);
+
+ if (options.features) {
+ out->message(out, "features");
+ exit_code = CRM_EX_OK;
+ goto done;
+ }
+
+ if (args->version) {
+ out->version(out, false);
+ goto done;
+ }
+
+ pcmk__set_env_option("mcp", "true");
+
+ if (options.shutdown) {
+ pcmk__cli_init_logging("pacemakerd", args->verbosity);
+ } else {
+ crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
+ }
+
+ crm_debug("Checking for existing Pacemaker instance");
+
+ rc = pcmk_new_ipc_api(&old_instance, pcmk_ipc_pacemakerd);
+ if (old_instance == NULL) {
+ out->err(out, "Could not check for existing pacemakerd: %s", pcmk_rc_str(rc));
+ exit_code = pcmk_rc2exitc(rc);
+ goto done;
+ }
+
+ pcmk_register_ipc_callback(old_instance, pacemakerd_event_cb, NULL);
+ rc = pcmk_connect_ipc(old_instance, pcmk_ipc_dispatch_sync);
+ old_instance_connected = pcmk_ipc_is_connected(old_instance);
+
+ if (options.shutdown) {
+ if (old_instance_connected) {
+ rc = pcmk_pacemakerd_api_shutdown(old_instance, crm_system_name);
+ pcmk_dispatch_ipc(old_instance);
+
+ exit_code = pcmk_rc2exitc(rc);
+
+ if (exit_code != CRM_EX_OK) {
+ pcmk_free_ipc_api(old_instance);
+ goto done;
+ }
+
+ /* We get the ACK immediately, and the response right after that,
+ * but it might take a while for pacemakerd to get around to
+ * shutting down. Wait for that to happen (with 30-minute timeout).
+ */
+ for (int i = 0; i < 900; i++) {
+ if (!pcmk_ipc_is_connected(old_instance)) {
+ exit_code = CRM_EX_OK;
+ pcmk_free_ipc_api(old_instance);
+ goto done;
+ }
+
+ sleep(2);
+ }
+
+ exit_code = CRM_EX_TIMEOUT;
+ pcmk_free_ipc_api(old_instance);
+ goto done;
+
+ } else {
+ out->err(out, "Could not request shutdown "
+ "of existing Pacemaker instance: %s", pcmk_rc_str(rc));
+ pcmk_free_ipc_api(old_instance);
+ exit_code = CRM_EX_DISCONNECT;
+ goto done;
+ }
+
+ } else if (old_instance_connected) {
+ pcmk_free_ipc_api(old_instance);
+ crm_err("Aborting start-up because active Pacemaker instance found");
+ exit_code = CRM_EX_FATAL;
+ goto done;
+ }
+
+ pcmk_free_ipc_api(old_instance);
+
+ /* Don't allow any accidental output after this point. */
+ if (out != NULL) {
+ out->finish(out, exit_code, true, NULL);
+ pcmk__output_free(out);
+ out = NULL;
+ }
+
+#ifdef SUPPORT_COROSYNC
+ if (mcp_read_config() == FALSE) {
+ crm_exit(CRM_EX_UNAVAILABLE);
+ }
+#endif
+
+ // OCF shell functions and cluster-glue need facility under different name
+ {
+ const char *facility = pcmk__env_option(PCMK__ENV_LOGFACILITY);
+
+ if (!pcmk__str_eq(facility, PCMK__VALUE_NONE,
+ pcmk__str_casei|pcmk__str_null_matches)) {
+ setenv("HA_LOGFACILITY", facility, 1);
+ }
+ }
+
+ crm_notice("Starting Pacemaker %s "CRM_XS" build=%s features:%s",
+ PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES);
+ mainloop = g_main_loop_new(NULL, FALSE);
+
+ remove_core_file_limit();
+ create_pcmk_dirs();
+ pcmk__serve_pacemakerd_ipc(&ipcs, &mcp_ipc_callbacks);
+
+#ifdef SUPPORT_COROSYNC
+ /* Allows us to block shutdown */
+ if (!cluster_connect_cfg()) {
+ exit_code = CRM_EX_PROTOCOL;
+ goto done;
+ }
+#endif
+
+ if (pcmk__locate_sbd() > 0) {
+ setenv("PCMK_watchdog", "true", 1);
+ running_with_sbd = TRUE;
+ } else {
+ setenv("PCMK_watchdog", "false", 1);
+ }
+
+ switch (find_and_track_existing_processes()) {
+ case pcmk_rc_ok:
+ break;
+ case pcmk_rc_ipc_unauthorized:
+ exit_code = CRM_EX_CANTCREAT;
+ goto done;
+ default:
+ exit_code = CRM_EX_FATAL;
+ goto done;
+ };
+
+ mainloop_add_signal(SIGTERM, pcmk_shutdown);
+ mainloop_add_signal(SIGINT, pcmk_shutdown);
+
+ if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) {
+ crm_notice("Waiting for startup-trigger from SBD.");
+ pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING;
+ startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL);
+ } else {
+ if (running_with_sbd) {
+ crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported "
+ "by your SBD version) improve reliability of "
+ "interworking between SBD & pacemaker.");
+ }
+ pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
+ init_children_processes(NULL);
+ }
+
+ crm_notice("Pacemaker daemon successfully started and accepting connections");
+ g_main_loop_run(mainloop);
+
+ if (ipcs) {
+ crm_trace("Closing IPC server");
+ mainloop_del_ipc_server(ipcs);
+ ipcs = NULL;
+ }
+
+ g_main_loop_unref(mainloop);
+#ifdef SUPPORT_COROSYNC
+ cluster_disconnect_cfg();
+#endif
+
+done:
+ g_strfreev(processed_args);
+ pcmk__free_arg_context(context);
+
+ pcmk__output_and_clear_error(&error, out);
+
+ if (out != NULL) {
+ out->finish(out, exit_code, true, NULL);
+ pcmk__output_free(out);
+ }
+ pcmk__unregister_formats();
+ crm_exit(exit_code);
+}
diff --git a/daemons/pacemakerd/pacemakerd.h b/daemons/pacemakerd/pacemakerd.h
new file mode 100644
index 0000000..b2a6864
--- /dev/null
+++ b/daemons/pacemakerd/pacemakerd.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2010-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdint.h>
+
+#define MAX_RESPAWN 100
+
+extern GMainLoop *mainloop;
+extern struct qb_ipcs_service_handlers mcp_ipc_callbacks;
+extern const char *pacemakerd_state;
+extern gboolean running_with_sbd;
+extern unsigned int shutdown_complete_state_reported_to;
+extern gboolean shutdown_complete_state_reported_client_closed;
+extern crm_trigger_t *shutdown_trigger;
+extern crm_trigger_t *startup_trigger;
+extern time_t subdaemon_check_progress;
+
+gboolean mcp_read_config(void);
+
+gboolean cluster_connect_cfg(void);
+void cluster_disconnect_cfg(void);
+int find_and_track_existing_processes(void);
+gboolean init_children_processes(void *user_data);
+void restart_cluster_subdaemons(void);
+void pcmk_shutdown(int nsig);
+void pcmkd_shutdown_corosync(void);
+bool pcmkd_corosync_connected(void);
diff --git a/daemons/pacemakerd/pcmkd_corosync.c b/daemons/pacemakerd/pcmkd_corosync.c
new file mode 100644
index 0000000..2648756
--- /dev/null
+++ b/daemons/pacemakerd/pcmkd_corosync.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright 2010-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include "pacemakerd.h"
+
+#include <sys/utsname.h>
+#include <sys/stat.h> /* for calls to stat() */
+#include <libgen.h> /* For basename() and dirname() */
+
+#include <sys/types.h>
+#include <pwd.h> /* For getpwname() */
+
+#include <corosync/hdb.h>
+#include <corosync/cfg.h>
+#include <corosync/cpg.h>
+#include <corosync/cmap.h>
+
+#include <crm/cluster/internal.h>
+#include <crm/common/ipc.h> /* for crm_ipc_is_authentic_process */
+#include <crm/common/mainloop.h>
+
+#include <crm/common/ipc_internal.h> /* PCMK__SPECIAL_PID* */
+
+static corosync_cfg_handle_t cfg_handle = 0;
+static mainloop_timer_t *reconnect_timer = NULL;
+
+/* =::=::=::= CFG - Shutdown stuff =::=::=::= */
+
+static void
+cfg_shutdown_callback(corosync_cfg_handle_t h, corosync_cfg_shutdown_flags_t flags)
+{
+ crm_info("Corosync wants to shut down: %s",
+ (flags == COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE) ? "immediate" :
+ (flags == COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS) ? "forced" : "optional");
+
+ /* Never allow corosync to shut down while we're running */
+ corosync_cfg_replyto_shutdown(h, COROSYNC_CFG_SHUTDOWN_FLAG_NO);
+}
+
+static corosync_cfg_callbacks_t cfg_callbacks = {
+ .corosync_cfg_shutdown_callback = cfg_shutdown_callback,
+};
+
+static int
+pcmk_cfg_dispatch(gpointer user_data)
+{
+ corosync_cfg_handle_t *handle = (corosync_cfg_handle_t *) user_data;
+ cs_error_t rc = corosync_cfg_dispatch(*handle, CS_DISPATCH_ALL);
+
+ if (rc != CS_OK) {
+ return -1;
+ }
+ return 0;
+}
+
+static void
+close_cfg(void)
+{
+ if (cfg_handle != 0) {
+#ifdef HAVE_COROSYNC_CFG_TRACKSTART
+ /* Ideally, we would call corosync_cfg_trackstop(cfg_handle) here, but a
+ * bug in corosync 3.1.1 and 3.1.2 makes it hang forever. Thankfully,
+ * it's not necessary since we exit immediately after this.
+ */
+#endif
+ corosync_cfg_finalize(cfg_handle);
+ cfg_handle = 0;
+ }
+}
+
+static gboolean
+cluster_reconnect_cb(gpointer data)
+{
+ if (cluster_connect_cfg()) {
+ mainloop_timer_del(reconnect_timer);
+ reconnect_timer = NULL;
+ crm_notice("Cluster reconnect succeeded");
+ mcp_read_config();
+ restart_cluster_subdaemons();
+ return G_SOURCE_REMOVE;
+ } else {
+ crm_info("Cluster reconnect failed "
+ "(connection will be reattempted once per second)");
+ }
+ /*
+ * In theory this will continue forever. In practice the CIB connection from
+ * attrd will timeout and shut down Pacemaker when it gets bored.
+ */
+ return G_SOURCE_CONTINUE;
+}
+
+
+static void
+cfg_connection_destroy(gpointer user_data)
+{
+ crm_warn("Lost connection to cluster layer "
+ "(connection will be reattempted once per second)");
+ corosync_cfg_finalize(cfg_handle);
+ cfg_handle = 0;
+ reconnect_timer = mainloop_timer_add("corosync reconnect", 1000, TRUE, cluster_reconnect_cb, NULL);
+ mainloop_timer_start(reconnect_timer);
+}
+
+void
+cluster_disconnect_cfg(void)
+{
+ close_cfg();
+ if (reconnect_timer != NULL) {
+ /* The mainloop should be gone by this point, so this isn't necessary,
+ * but cleaning up memory should make valgrind happier.
+ */
+ mainloop_timer_del(reconnect_timer);
+ reconnect_timer = NULL;
+ }
+}
+
+#define cs_repeat(counter, max, code) do { \
+ code; \
+ if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \
+ counter++; \
+ crm_debug("Retrying Corosync operation after %ds", counter); \
+ sleep(counter); \
+ } else { \
+ break; \
+ } \
+ } while(counter < max)
+
+gboolean
+cluster_connect_cfg(void)
+{
+ cs_error_t rc;
+ int fd = -1, retries = 0, rv;
+ uid_t found_uid = 0;
+ gid_t found_gid = 0;
+ pid_t found_pid = 0;
+ uint32_t nodeid;
+
+ static struct mainloop_fd_callbacks cfg_fd_callbacks = {
+ .dispatch = pcmk_cfg_dispatch,
+ .destroy = cfg_connection_destroy,
+ };
+
+ cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks));
+
+ if (rc != CS_OK) {
+ crm_crit("Could not connect to Corosync CFG: %s " CRM_XS " rc=%d",
+ cs_strerror(rc), rc);
+ return FALSE;
+ }
+
+ rc = corosync_cfg_fd_get(cfg_handle, &fd);
+ if (rc != CS_OK) {
+ crm_crit("Could not get Corosync CFG descriptor: %s " CRM_XS " rc=%d",
+ cs_strerror(rc), rc);
+ goto bail;
+ }
+
+ /* CFG provider run as root (in given user namespace, anyway)? */
+ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
+ &found_uid, &found_gid))) {
+ crm_crit("Rejecting Corosync CFG provider because process %lld "
+ "is running as uid %lld gid %lld, not root",
+ (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
+ (long long) found_uid, (long long) found_gid);
+ goto bail;
+ } else if (rv < 0) {
+ crm_crit("Could not authenticate Corosync CFG provider: %s "
+ CRM_XS " rc=%d", strerror(-rv), -rv);
+ goto bail;
+ }
+
+ retries = 0;
+ cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, &nodeid));
+ if (rc != CS_OK) {
+ crm_crit("Could not get local node ID from Corosync: %s "
+ CRM_XS " rc=%d", cs_strerror(rc), rc);
+ goto bail;
+ }
+ crm_debug("Corosync reports local node ID is %lu", (unsigned long) nodeid);
+
+#ifdef HAVE_COROSYNC_CFG_TRACKSTART
+ retries = 0;
+ cs_repeat(retries, 30, rc = corosync_cfg_trackstart(cfg_handle, 0));
+ if (rc != CS_OK) {
+ crm_crit("Could not enable Corosync CFG shutdown tracker: %s " CRM_XS " rc=%d",
+ cs_strerror(rc), rc);
+ goto bail;
+ }
+#endif
+
+ mainloop_add_fd("corosync-cfg", G_PRIORITY_DEFAULT, fd, &cfg_handle, &cfg_fd_callbacks);
+ return TRUE;
+
+ bail:
+ corosync_cfg_finalize(cfg_handle);
+ return FALSE;
+}
+
+void
+pcmkd_shutdown_corosync(void)
+{
+ cs_error_t rc;
+
+ if (cfg_handle == 0) {
+ crm_warn("Unable to shut down Corosync: No connection");
+ return;
+ }
+ crm_info("Asking Corosync to shut down");
+ rc = corosync_cfg_try_shutdown(cfg_handle,
+ COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE);
+ if (rc == CS_OK) {
+ close_cfg();
+ } else {
+ crm_warn("Corosync shutdown failed: %s " CRM_XS " rc=%d",
+ cs_strerror(rc), rc);
+ }
+}
+
+bool
+pcmkd_corosync_connected(void)
+{
+ cpg_handle_t local_handle = 0;
+ cpg_model_v1_data_t cpg_model_info = {CPG_MODEL_V1, NULL, NULL, NULL, 0};
+ int fd = -1;
+
+ if (cpg_model_initialize(&local_handle, CPG_MODEL_V1, (cpg_model_data_t *) &cpg_model_info, NULL) != CS_OK) {
+ return false;
+ }
+
+ if (cpg_fd_get(local_handle, &fd) != CS_OK) {
+ return false;
+ }
+
+ cpg_finalize(local_handle);
+
+ return true;
+}
+
+/* =::=::=::= Configuration =::=::=::= */
+static int
+get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, char **value,
+ const char *fallback)
+{
+ int rc = 0, retries = 0;
+
+ cs_repeat(retries, 5, rc = cmap_get_string(object_handle, key, value));
+ if (rc != CS_OK) {
+ crm_trace("Search for %s failed %d, defaulting to %s", key, rc, fallback);
+ pcmk__str_update(value, fallback);
+ }
+ crm_trace("%s: %s", key, *value);
+ return rc;
+}
+
+gboolean
+mcp_read_config(void)
+{
+ cs_error_t rc = CS_OK;
+ int retries = 0;
+ cmap_handle_t local_handle;
+ uint64_t config = 0;
+ int fd = -1;
+ uid_t found_uid = 0;
+ gid_t found_gid = 0;
+ pid_t found_pid = 0;
+ int rv;
+ enum cluster_type_e stack;
+
+ // There can be only one possibility
+ do {
+ rc = pcmk__init_cmap(&local_handle);
+ if (rc != CS_OK) {
+ retries++;
+ crm_info("Could not connect to Corosync CMAP: %s (retrying in %ds) "
+ CRM_XS " rc=%d", cs_strerror(rc), retries, rc);
+ sleep(retries);
+
+ } else {
+ break;
+ }
+
+ } while (retries < 5);
+
+ if (rc != CS_OK) {
+ crm_crit("Could not connect to Corosync CMAP: %s "
+ CRM_XS " rc=%d", cs_strerror(rc), rc);
+ return FALSE;
+ }
+
+ rc = cmap_fd_get(local_handle, &fd);
+ if (rc != CS_OK) {
+ crm_crit("Could not get Corosync CMAP descriptor: %s " CRM_XS " rc=%d",
+ cs_strerror(rc), rc);
+ cmap_finalize(local_handle);
+ return FALSE;
+ }
+
+ /* CMAP provider run as root (in given user namespace, anyway)? */
+ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
+ &found_uid, &found_gid))) {
+ crm_crit("Rejecting Corosync CMAP provider because process %lld "
+ "is running as uid %lld gid %lld, not root",
+ (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
+ (long long) found_uid, (long long) found_gid);
+ cmap_finalize(local_handle);
+ return FALSE;
+ } else if (rv < 0) {
+ crm_crit("Could not authenticate Corosync CMAP provider: %s "
+ CRM_XS " rc=%d", strerror(-rv), -rv);
+ cmap_finalize(local_handle);
+ return FALSE;
+ }
+
+ stack = get_cluster_type();
+ if (stack != pcmk_cluster_corosync) {
+ crm_crit("Expected Corosync cluster layer but detected %s "
+ CRM_XS " stack=%d", name_for_cluster_type(stack), stack);
+ return FALSE;
+ }
+
+ crm_info("Reading configuration for %s stack",
+ name_for_cluster_type(stack));
+ pcmk__set_env_option(PCMK__ENV_CLUSTER_TYPE, "corosync");
+ pcmk__set_env_option(PCMK__ENV_QUORUM_TYPE, "corosync");
+
+ // If debug logging is not configured, check whether corosync has it
+ if (pcmk__env_option(PCMK__ENV_DEBUG) == NULL) {
+ char *debug_enabled = NULL;
+
+ get_config_opt(config, local_handle, "logging.debug", &debug_enabled, "off");
+
+ if (crm_is_true(debug_enabled)) {
+ pcmk__set_env_option(PCMK__ENV_DEBUG, "1");
+ if (get_crm_log_level() < LOG_DEBUG) {
+ set_crm_log_level(LOG_DEBUG);
+ }
+
+ } else {
+ pcmk__set_env_option(PCMK__ENV_DEBUG, "0");
+ }
+
+ free(debug_enabled);
+ }
+
+ if(local_handle){
+ gid_t gid = 0;
+ if (pcmk_daemon_user(NULL, &gid) < 0) {
+ crm_warn("Could not authorize group with Corosync " CRM_XS
+ " No group found for user %s", CRM_DAEMON_USER);
+
+ } else {
+ char key[PATH_MAX];
+ snprintf(key, PATH_MAX, "uidgid.gid.%u", gid);
+ rc = cmap_set_uint8(local_handle, key, 1);
+ if (rc != CS_OK) {
+ crm_warn("Could not authorize group with Corosync: %s " CRM_XS
+ " group=%u rc=%d", pcmk__cs_err_str(rc), gid, rc);
+ }
+ }
+ }
+ cmap_finalize(local_handle);
+
+ return TRUE;
+}
diff --git a/daemons/pacemakerd/pcmkd_messages.c b/daemons/pacemakerd/pcmkd_messages.c
new file mode 100644
index 0000000..7ed9899
--- /dev/null
+++ b/daemons/pacemakerd/pcmkd_messages.c
@@ -0,0 +1,278 @@
+/*
+ * Copyright 2010-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include "pacemakerd.h"
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <time.h>
+#include <sys/types.h>
+
+static GHashTable *pcmkd_handlers = NULL;
+
+static xmlNode *
+handle_node_cache_request(pcmk__request_t *request)
+{
+ crm_trace("Ignoring request from client %s to purge node "
+ "because peer cache is not used",
+ pcmk__client_name(request->ipc_client));
+
+ pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags,
+ "ack", NULL, CRM_EX_OK);
+ return NULL;
+}
+
+static xmlNode *
+handle_ping_request(pcmk__request_t *request)
+{
+ xmlNode *msg = request->xml;
+
+ const char *value = NULL;
+ xmlNode *ping = NULL;
+ xmlNode *reply = NULL;
+ const char *from = crm_element_value(msg, F_CRM_SYS_FROM);
+
+ /* Pinged for status */
+ crm_trace("Pinged from " F_CRM_SYS_FROM "='%s' " F_CRM_ORIGIN "='%s'",
+ pcmk__s(from, ""),
+ pcmk__s(crm_element_value(msg, F_CRM_ORIGIN), ""));
+
+ pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags,
+ "ack", NULL, CRM_EX_INDETERMINATE);
+
+ ping = create_xml_node(NULL, XML_CRM_TAG_PING);
+ value = crm_element_value(msg, F_CRM_SYS_TO);
+ crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value);
+ crm_xml_add(ping, XML_PING_ATTR_PACEMAKERDSTATE, pacemakerd_state);
+ crm_xml_add_ll(ping, XML_ATTR_TSTAMP,
+ (long long) subdaemon_check_progress);
+ crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
+ reply = create_reply(msg, ping);
+
+ free_xml(ping);
+
+ if (reply == NULL) {
+ pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
+ "Failed building ping reply for client %s",
+ pcmk__client_name(request->ipc_client));
+ } else {
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ }
+
+ /* just proceed state on sbd pinging us */
+ if (from && strstr(from, "sbd")) {
+ if (pcmk__str_eq(pacemakerd_state, XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE, pcmk__str_none)) {
+ if (pcmk__get_sbd_sync_resource_startup()) {
+ crm_notice("Shutdown-complete-state passed to SBD.");
+ }
+
+ shutdown_complete_state_reported_to = request->ipc_client->pid;
+
+ } else if (pcmk__str_eq(pacemakerd_state, XML_PING_ATTR_PACEMAKERDSTATE_WAITPING, pcmk__str_none)) {
+ crm_notice("Received startup-trigger from SBD.");
+ pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
+ mainloop_set_trigger(startup_trigger);
+ }
+ }
+
+ return reply;
+}
+
+static xmlNode *
+handle_shutdown_request(pcmk__request_t *request)
+{
+ xmlNode *msg = request->xml;
+
+ xmlNode *shutdown = NULL;
+ xmlNode *reply = NULL;
+
+ /* Only allow privileged users (i.e. root or hacluster) to shut down
+ * Pacemaker from the command line (or direct IPC), so that other users
+ * are forced to go through the CIB and have ACLs applied.
+ */
+ bool allowed = pcmk_is_set(request->ipc_client->flags, pcmk__client_privileged);
+
+ pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags,
+ "ack", NULL, CRM_EX_INDETERMINATE);
+
+ shutdown = create_xml_node(NULL, XML_CIB_ATTR_SHUTDOWN);
+
+ if (allowed) {
+ crm_notice("Shutting down in response to IPC request %s from %s",
+ crm_element_value(msg, F_CRM_REFERENCE),
+ crm_element_value(msg, F_CRM_ORIGIN));
+ crm_xml_add_int(shutdown, XML_LRM_ATTR_OPSTATUS, CRM_EX_OK);
+ } else {
+ crm_warn("Ignoring shutdown request from unprivileged client %s",
+ pcmk__client_name(request->ipc_client));
+ crm_xml_add_int(shutdown, XML_LRM_ATTR_OPSTATUS, CRM_EX_INSUFFICIENT_PRIV);
+ }
+
+ reply = create_reply(msg, shutdown);
+ free_xml(shutdown);
+
+ if (reply == NULL) {
+ pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
+ "Failed building shutdown reply for client %s",
+ pcmk__client_name(request->ipc_client));
+ } else {
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ }
+
+ if (allowed) {
+ pcmk_shutdown(15);
+ }
+
+ return reply;
+}
+
+static xmlNode *
+handle_unknown_request(pcmk__request_t *request)
+{
+ pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags,
+ "ack", NULL, CRM_EX_INVALID_PARAM);
+
+ pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
+ "Unknown IPC request type '%s' (bug?)",
+ pcmk__client_name(request->ipc_client));
+ return NULL;
+}
+
+static void
+pcmkd_register_handlers(void)
+{
+ pcmk__server_command_t handlers[] = {
+ { CRM_OP_RM_NODE_CACHE, handle_node_cache_request },
+ { CRM_OP_PING, handle_ping_request },
+ { CRM_OP_QUIT, handle_shutdown_request },
+ { NULL, handle_unknown_request },
+ };
+
+ pcmkd_handlers = pcmk__register_handlers(handlers);
+}
+
+static int32_t
+pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
+{
+ crm_trace("Connection %p", c);
+ if (pcmk__new_client(c, uid, gid) == NULL) {
+ return -EIO;
+ }
+ return 0;
+}
+
+/* Error code means? */
+static int32_t
+pcmk_ipc_closed(qb_ipcs_connection_t * c)
+{
+ pcmk__client_t *client = pcmk__find_client(c);
+
+ if (client == NULL) {
+ return 0;
+ }
+ crm_trace("Connection %p", c);
+ if (shutdown_complete_state_reported_to == client->pid) {
+ shutdown_complete_state_reported_client_closed = TRUE;
+ if (shutdown_trigger) {
+ mainloop_set_trigger(shutdown_trigger);
+ }
+ }
+ pcmk__free_client(client);
+ return 0;
+}
+
+static void
+pcmk_ipc_destroy(qb_ipcs_connection_t * c)
+{
+ crm_trace("Connection %p", c);
+ pcmk_ipc_closed(c);
+}
+
+/* Exit code means? */
+static int32_t
+pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
+{
+ uint32_t id = 0;
+ uint32_t flags = 0;
+ xmlNode *msg = NULL;
+ pcmk__client_t *c = pcmk__find_client(qbc);
+
+ CRM_CHECK(c != NULL, return 0);
+
+ if (pcmkd_handlers == NULL) {
+ pcmkd_register_handlers();
+ }
+
+ msg = pcmk__client_data2xml(c, data, &id, &flags);
+ if (msg == NULL) {
+ pcmk__ipc_send_ack(c, id, flags, "ack", NULL, CRM_EX_PROTOCOL);
+ return 0;
+
+ } else {
+ char *log_msg = NULL;
+ const char *reason = NULL;
+ xmlNode *reply = NULL;
+
+ pcmk__request_t request = {
+ .ipc_client = c,
+ .ipc_id = id,
+ .ipc_flags = flags,
+ .peer = NULL,
+ .xml = msg,
+ .call_options = 0,
+ .result = PCMK__UNKNOWN_RESULT,
+ };
+
+ request.op = crm_element_value_copy(request.xml, F_CRM_TASK);
+ CRM_CHECK(request.op != NULL, return 0);
+
+ reply = pcmk__process_request(&request, pcmkd_handlers);
+
+ if (reply != NULL) {
+ pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event);
+ free_xml(reply);
+ }
+
+ reason = request.result.exit_reason;
+
+ log_msg = crm_strdup_printf("Processed %s request from %s %s: %s%s%s%s",
+ request.op, pcmk__request_origin_type(&request),
+ pcmk__request_origin(&request),
+ pcmk_exec_status_str(request.result.execution_status),
+ (reason == NULL)? "" : " (",
+ (reason == NULL)? "" : reason,
+ (reason == NULL)? "" : ")");
+
+ if (!pcmk__result_ok(&request.result)) {
+ crm_warn("%s", log_msg);
+ } else {
+ crm_debug("%s", log_msg);
+ }
+
+ free(log_msg);
+ pcmk__reset_request(&request);
+ }
+
+ free_xml(msg);
+ return 0;
+}
+
+struct qb_ipcs_service_handlers mcp_ipc_callbacks = {
+ .connection_accept = pcmk_ipc_accept,
+ .connection_created = NULL,
+ .msg_process = pcmk_ipc_dispatch,
+ .connection_closed = pcmk_ipc_closed,
+ .connection_destroyed = pcmk_ipc_destroy
+};
diff --git a/daemons/pacemakerd/pcmkd_subdaemons.c b/daemons/pacemakerd/pcmkd_subdaemons.c
new file mode 100644
index 0000000..3b08ecc
--- /dev/null
+++ b/daemons/pacemakerd/pcmkd_subdaemons.c
@@ -0,0 +1,888 @@
+/*
+ * Copyright 2010-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include "pacemakerd.h"
+
+#include <errno.h>
+#include <grp.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <crm/cluster.h>
+#include <crm/msg_xml.h>
+
+typedef struct pcmk_child_s {
+ pid_t pid;
+ int respawn_count;
+ bool respawn;
+ const char *name;
+ const char *uid;
+ const char *command;
+ const char *endpoint; /* IPC server name */
+ bool needs_cluster;
+ int check_count;
+
+ /* Anything below here will be dynamically initialized */
+ bool needs_retry;
+ bool active_before_startup;
+} pcmk_child_t;
+
+#define PCMK_PROCESS_CHECK_INTERVAL 1
+#define PCMK_PROCESS_CHECK_RETRIES 5
+#define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */
+
+/* Index into the array below */
+#define PCMK_CHILD_CONTROLD 5
+
+static pcmk_child_t pcmk_children[] = {
+ {
+ 0, 0, true, "pacemaker-based", CRM_DAEMON_USER,
+ CRM_DAEMON_DIR "/pacemaker-based", PCMK__SERVER_BASED_RO,
+ true
+ },
+ {
+ 0, 0, true, "pacemaker-fenced", NULL,
+ CRM_DAEMON_DIR "/pacemaker-fenced", "stonith-ng",
+ true
+ },
+ {
+ 0, 0, true, "pacemaker-execd", NULL,
+ CRM_DAEMON_DIR "/pacemaker-execd", CRM_SYSTEM_LRMD,
+ false
+ },
+ {
+ 0, 0, true, "pacemaker-attrd", CRM_DAEMON_USER,
+ CRM_DAEMON_DIR "/pacemaker-attrd", T_ATTRD,
+ true
+ },
+ {
+ 0, 0, true, "pacemaker-schedulerd", CRM_DAEMON_USER,
+ CRM_DAEMON_DIR "/pacemaker-schedulerd", CRM_SYSTEM_PENGINE,
+ false
+ },
+ {
+ 0, 0, true, "pacemaker-controld", CRM_DAEMON_USER,
+ CRM_DAEMON_DIR "/pacemaker-controld", CRM_SYSTEM_CRMD,
+ true
+ },
+};
+
+static char *opts_default[] = { NULL, NULL };
+static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
+
+crm_trigger_t *shutdown_trigger = NULL;
+crm_trigger_t *startup_trigger = NULL;
+time_t subdaemon_check_progress = 0;
+
+// Whether we need root group access to talk to cluster layer
+static bool need_root_group = true;
+
+/* When contacted via pacemakerd-api by a client having sbd in
+ * the name we assume it is sbd-daemon which wants to know
+ * if pacemakerd shutdown gracefully.
+ * Thus when everything is shutdown properly pacemakerd
+ * waits till it has reported the graceful completion of
+ * shutdown to sbd and just when sbd-client closes the
+ * connection we can assume that the report has arrived
+ * properly so that pacemakerd can finally exit.
+ * Following two variables are used to track that handshake.
+ */
+unsigned int shutdown_complete_state_reported_to = 0;
+gboolean shutdown_complete_state_reported_client_closed = FALSE;
+
+/* state we report when asked via pacemakerd-api status-ping */
+const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT;
+gboolean running_with_sbd = FALSE; /* local copy */
+
+GMainLoop *mainloop = NULL;
+
+static gboolean fatal_error = FALSE;
+
+static int child_liveness(pcmk_child_t *child);
+static gboolean escalate_shutdown(gpointer data);
+static int start_child(pcmk_child_t * child);
+static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode);
+static void pcmk_process_exit(pcmk_child_t * child);
+static gboolean pcmk_shutdown_worker(gpointer user_data);
+static gboolean stop_child(pcmk_child_t * child, int signal);
+
+static bool
+pcmkd_cluster_connected(void)
+{
+#if SUPPORT_COROSYNC
+ return pcmkd_corosync_connected();
+#else
+ return true;
+#endif
+}
+
+static gboolean
+check_next_subdaemon(gpointer user_data)
+{
+ static int next_child = 0;
+ int rc = child_liveness(&pcmk_children[next_child]);
+
+ crm_trace("Checked %s[%lld]: %s (%d)",
+ pcmk_children[next_child].name,
+ (long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[next_child].pid),
+ pcmk_rc_str(rc), rc);
+
+ switch (rc) {
+ case pcmk_rc_ok:
+ pcmk_children[next_child].check_count = 0;
+ subdaemon_check_progress = time(NULL);
+ break;
+ case pcmk_rc_ipc_pid_only: // This case: it was previously OK
+ pcmk_children[next_child].check_count++;
+ if (pcmk_children[next_child].check_count >= PCMK_PROCESS_CHECK_RETRIES) {
+ crm_err("%s[%lld] is unresponsive to ipc after %d tries but "
+ "we found the pid so have it killed that we can restart",
+ pcmk_children[next_child].name,
+ (long long) PCMK__SPECIAL_PID_AS_0(
+ pcmk_children[next_child].pid),
+ pcmk_children[next_child].check_count);
+ stop_child(&pcmk_children[next_child], SIGKILL);
+ if (pcmk_children[next_child].respawn) {
+ /* as long as the respawn-limit isn't reached
+ give it another round of check retries
+ */
+ pcmk_children[next_child].check_count = 0;
+ }
+ } else {
+ crm_notice("%s[%lld] is unresponsive to ipc after %d tries",
+ pcmk_children[next_child].name,
+ (long long) PCMK__SPECIAL_PID_AS_0(
+ pcmk_children[next_child].pid),
+ pcmk_children[next_child].check_count);
+ if (pcmk_children[next_child].respawn) {
+ /* as long as the respawn-limit isn't reached
+ and we haven't run out of connect retries
+ we account this as progress we are willing
+ to tell to sbd
+ */
+ subdaemon_check_progress = time(NULL);
+ }
+ }
+ /* go to the next child and see if
+ we can make progress there
+ */
+ break;
+ case pcmk_rc_ipc_unresponsive:
+ if (!pcmk_children[next_child].respawn) {
+ /* if a subdaemon is down and we don't want it
+ to be restarted this is a success during
+ shutdown. if it isn't restarted anymore
+ due to MAX_RESPAWN it is
+ rather no success.
+ */
+ if (pcmk_children[next_child].respawn_count <= MAX_RESPAWN) {
+ subdaemon_check_progress = time(NULL);
+ }
+ }
+ if (!pcmk_children[next_child].active_before_startup) {
+ crm_trace("found %s[%lld] missing - signal-handler "
+ "will take care of it",
+ pcmk_children[next_child].name,
+ (long long) PCMK__SPECIAL_PID_AS_0(
+ pcmk_children[next_child].pid));
+ break;
+ }
+ if (pcmk_children[next_child].respawn) {
+ crm_err("%s[%lld] terminated",
+ pcmk_children[next_child].name,
+ (long long) PCMK__SPECIAL_PID_AS_0(
+ pcmk_children[next_child].pid));
+ } else {
+ /* orderly shutdown */
+ crm_notice("%s[%lld] terminated",
+ pcmk_children[next_child].name,
+ (long long) PCMK__SPECIAL_PID_AS_0(
+ pcmk_children[next_child].pid));
+ }
+ pcmk_process_exit(&(pcmk_children[next_child]));
+ break;
+ default:
+ crm_exit(CRM_EX_FATAL);
+ break; /* static analysis/noreturn */
+ }
+
+ next_child++;
+ if (next_child >= PCMK__NELEM(pcmk_children)) {
+ next_child = 0;
+ }
+
+ return G_SOURCE_CONTINUE;
+}
+
+static gboolean
+escalate_shutdown(gpointer data)
+{
+ pcmk_child_t *child = data;
+
+ if (child->pid == PCMK__SPECIAL_PID) {
+ pcmk_process_exit(child);
+
+ } else if (child->pid != 0) {
+ /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */
+ crm_err("Child %s not terminating in a timely manner, forcing", child->name);
+ stop_child(child, SIGSEGV);
+ }
+ return FALSE;
+}
+
+static void
+pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
+{
+ pcmk_child_t *child = mainloop_child_userdata(p);
+ const char *name = mainloop_child_name(p);
+
+ if (signo) {
+ do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR),
+ "%s[%d] terminated with signal %d (%s)%s",
+ name, pid, signo, strsignal(signo),
+ (core? " and dumped core" : ""));
+
+ } else {
+ switch(exitcode) {
+ case CRM_EX_OK:
+ crm_info("%s[%d] exited with status %d (%s)",
+ name, pid, exitcode, crm_exit_str(exitcode));
+ break;
+
+ case CRM_EX_FATAL:
+ crm_warn("Shutting cluster down because %s[%d] had fatal failure",
+ name, pid);
+ child->respawn = false;
+ fatal_error = TRUE;
+ pcmk_shutdown(SIGTERM);
+ break;
+
+ case CRM_EX_PANIC:
+ crm_emerg("%s[%d] instructed the machine to reset", name, pid);
+ child->respawn = false;
+ fatal_error = TRUE;
+ pcmk__panic(__func__);
+ pcmk_shutdown(SIGTERM);
+ break;
+
+ default:
+ crm_err("%s[%d] exited with status %d (%s)",
+ name, pid, exitcode, crm_exit_str(exitcode));
+ break;
+ }
+ }
+
+ pcmk_process_exit(child);
+}
+
+static void
+pcmk_process_exit(pcmk_child_t * child)
+{
+ child->pid = 0;
+ child->active_before_startup = false;
+ child->check_count = 0;
+
+ child->respawn_count += 1;
+ if (child->respawn_count > MAX_RESPAWN) {
+ crm_err("Child respawn count exceeded by %s", child->name);
+ child->respawn = false;
+ }
+
+ if (shutdown_trigger) {
+ /* resume step-wise shutdown (returned TRUE yields no parallelizing) */
+ mainloop_set_trigger(shutdown_trigger);
+
+ } else if (!child->respawn) {
+ /* nothing to do */
+
+ } else if (crm_is_true(getenv("PCMK_fail_fast"))) {
+ crm_err("Rebooting system because of %s", child->name);
+ pcmk__panic(__func__);
+
+ } else if (child_liveness(child) == pcmk_rc_ok) {
+ crm_warn("One-off suppressing strict respawning of a child process %s,"
+ " appears alright per %s IPC end-point",
+ child->name, child->endpoint);
+
+ } else if (child->needs_cluster && !pcmkd_cluster_connected()) {
+ crm_notice("Not respawning %s subdaemon until cluster returns",
+ child->name);
+ child->needs_retry = true;
+
+ } else {
+ crm_notice("Respawning %s subdaemon after unexpected exit",
+ child->name);
+ start_child(child);
+ }
+}
+
+static gboolean
+pcmk_shutdown_worker(gpointer user_data)
+{
+ static int phase = PCMK__NELEM(pcmk_children) - 1;
+ static time_t next_log = 0;
+
+ if (phase == PCMK__NELEM(pcmk_children) - 1) {
+ crm_notice("Shutting down Pacemaker");
+ pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN;
+ }
+
+ for (; phase >= 0; phase--) {
+ pcmk_child_t *child = &(pcmk_children[phase]);
+
+ if (child->pid != 0) {
+ time_t now = time(NULL);
+
+ if (child->respawn) {
+ if (child->pid == PCMK__SPECIAL_PID) {
+ crm_warn("The process behind %s IPC cannot be"
+ " terminated, so either wait the graceful"
+ " period of %ld s for its native termination"
+ " if it vitally depends on some other daemons"
+ " going down in a controlled way already,"
+ " or locate and kill the correct %s process"
+ " on your own; set PCMK_fail_fast=1 to avoid"
+ " this altogether next time around",
+ child->name, (long) SHUTDOWN_ESCALATION_PERIOD,
+ child->command);
+ }
+ next_log = now + 30;
+ child->respawn = false;
+ stop_child(child, SIGTERM);
+ if (phase < PCMK_CHILD_CONTROLD) {
+ g_timeout_add(SHUTDOWN_ESCALATION_PERIOD,
+ escalate_shutdown, child);
+ }
+
+ } else if (now >= next_log) {
+ next_log = now + 30;
+ crm_notice("Still waiting for %s to terminate "
+ CRM_XS " pid=%lld",
+ child->name, (long long) child->pid);
+ }
+ return TRUE;
+ }
+
+ /* cleanup */
+ crm_debug("%s confirmed stopped", child->name);
+ child->pid = 0;
+ }
+
+ crm_notice("Shutdown complete");
+ pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE;
+ if (!fatal_error && running_with_sbd &&
+ pcmk__get_sbd_sync_resource_startup() &&
+ !shutdown_complete_state_reported_client_closed) {
+ crm_notice("Waiting for SBD to pick up shutdown-complete-state.");
+ return TRUE;
+ }
+
+ {
+ const char *delay = pcmk__env_option(PCMK__ENV_SHUTDOWN_DELAY);
+ if(delay) {
+ sync();
+ pcmk__sleep_ms(crm_get_msec(delay));
+ }
+ }
+
+ g_main_loop_quit(mainloop);
+
+ if (fatal_error) {
+ crm_notice("Shutting down and staying down after fatal error");
+#ifdef SUPPORT_COROSYNC
+ pcmkd_shutdown_corosync();
+#endif
+ crm_exit(CRM_EX_FATAL);
+ }
+
+ return TRUE;
+}
+
+/* TODO once libqb is taught to juggle with IPC end-points carried over as
+ bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325)
+ it shall hand over these descriptors here if/once they are successfully
+ pre-opened in (presumably) child_liveness(), to avoid any remaining
+ room for races */
+ // \return Standard Pacemaker return code
+static int
+start_child(pcmk_child_t * child)
+{
+ uid_t uid = 0;
+ gid_t gid = 0;
+ gboolean use_valgrind = FALSE;
+ gboolean use_callgrind = FALSE;
+ const char *env_valgrind = getenv("PCMK_valgrind_enabled");
+ const char *env_callgrind = getenv("PCMK_callgrind_enabled");
+
+ child->active_before_startup = false;
+ child->check_count = 0;
+
+ if (child->command == NULL) {
+ crm_info("Nothing to do for child \"%s\"", child->name);
+ return pcmk_rc_ok;
+ }
+
+ if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
+ use_callgrind = TRUE;
+ use_valgrind = TRUE;
+
+ } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) {
+ use_callgrind = TRUE;
+ use_valgrind = TRUE;
+
+ } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
+ use_valgrind = TRUE;
+
+ } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) {
+ use_valgrind = TRUE;
+ }
+
+ if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
+ crm_warn("Cannot enable valgrind for %s:"
+ " The location of the valgrind binary is unknown", child->name);
+ use_valgrind = FALSE;
+ }
+
+ if (child->uid) {
+ if (crm_user_lookup(child->uid, &uid, &gid) < 0) {
+ crm_err("Invalid user (%s) for %s: not found", child->uid, child->name);
+ return EACCES;
+ }
+ crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name);
+ }
+
+ child->pid = fork();
+ CRM_ASSERT(child->pid != -1);
+
+ if (child->pid > 0) {
+ /* parent */
+ mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit);
+
+ crm_info("Forked child %lld for process %s%s",
+ (long long) child->pid, child->name,
+ use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
+ return pcmk_rc_ok;
+
+ } else {
+ /* Start a new session */
+ (void)setsid();
+
+ /* Setup the two alternate arg arrays */
+ opts_vgrind[0] = strdup(VALGRIND_BIN);
+ if (use_callgrind) {
+ opts_vgrind[1] = strdup("--tool=callgrind");
+ opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p");
+ opts_vgrind[3] = strdup(child->command);
+ opts_vgrind[4] = NULL;
+ } else {
+ opts_vgrind[1] = strdup(child->command);
+ opts_vgrind[2] = NULL;
+ opts_vgrind[3] = NULL;
+ opts_vgrind[4] = NULL;
+ }
+ opts_default[0] = strdup(child->command);
+
+ if(gid) {
+ // Drop root group access if not needed
+ if (!need_root_group && (setgid(gid) < 0)) {
+ crm_warn("Could not set group to %d: %s", gid, strerror(errno));
+ }
+
+ /* Initialize supplementary groups to only those always granted to
+ * the user, plus haclient (so we can access IPC).
+ */
+ if (initgroups(child->uid, gid) < 0) {
+ crm_err("Cannot initialize groups for %s: %s (%d)",
+ child->uid, pcmk_rc_str(errno), errno);
+ }
+ }
+
+ if (uid && setuid(uid) < 0) {
+ crm_warn("Could not set user to %s (id %d): %s",
+ child->uid, uid, strerror(errno));
+ }
+
+ pcmk__close_fds_in_child(true);
+
+ pcmk__open_devnull(O_RDONLY); // stdin (fd 0)
+ pcmk__open_devnull(O_WRONLY); // stdout (fd 1)
+ pcmk__open_devnull(O_WRONLY); // stderr (fd 2)
+
+ if (use_valgrind) {
+ (void)execvp(VALGRIND_BIN, opts_vgrind);
+ } else {
+ (void)execvp(child->command, opts_default);
+ }
+ crm_crit("Could not execute %s: %s", child->command, strerror(errno));
+ crm_exit(CRM_EX_FATAL);
+ }
+ return pcmk_rc_ok; /* never reached */
+}
+
+/*!
+ * \internal
+ * \brief Check the liveness of the child based on IPC name and PID if tracked
+ *
+ * \param[in,out] child Child tracked data
+ *
+ * \return Standard Pacemaker return code
+ *
+ * \note Return codes of particular interest include pcmk_rc_ipc_unresponsive
+ * indicating that no trace of IPC liveness was detected,
+ * pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by
+ * an unauthorized process, and pcmk_rc_ipc_pid_only indicating that
+ * the child is up by PID but not IPC end-point (possibly starting).
+ * \note This function doesn't modify any of \p child members but \c pid,
+ * and is not actively toying with processes as such but invoking
+ * \c stop_child in one particular case (there's for some reason
+ * a different authentic holder of the IPC end-point).
+ */
+static int
+child_liveness(pcmk_child_t *child)
+{
+ uid_t cl_uid = 0;
+ gid_t cl_gid = 0;
+ const uid_t root_uid = 0;
+ const gid_t root_gid = 0;
+ const uid_t *ref_uid;
+ const gid_t *ref_gid;
+ int rc = pcmk_rc_ipc_unresponsive;
+ pid_t ipc_pid = 0;
+
+ if (child->endpoint == NULL
+ && (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) {
+ crm_err("Cannot track child %s for missing both API end-point and PID",
+ child->name);
+ rc = EINVAL; // Misuse of function when child is not trackable
+
+ } else if (child->endpoint != NULL) {
+ int legacy_rc = pcmk_ok;
+
+ if (child->uid == NULL) {
+ ref_uid = &root_uid;
+ ref_gid = &root_gid;
+ } else {
+ ref_uid = &cl_uid;
+ ref_gid = &cl_gid;
+ legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid);
+ }
+
+ if (legacy_rc < 0) {
+ rc = pcmk_legacy2rc(legacy_rc);
+ crm_err("Could not find user and group IDs for user %s: %s "
+ CRM_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc);
+ } else {
+ rc = pcmk__ipc_is_authentic_process_active(child->endpoint,
+ *ref_uid, *ref_gid,
+ &ipc_pid);
+ if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) {
+ if (child->pid <= 0) {
+ /* If rc is pcmk_rc_ok, ipc_pid is nonzero and this
+ * initializes a new child. If rc is
+ * pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will
+ * investigate further.
+ */
+ child->pid = ipc_pid;
+ } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) {
+ /* An unexpected (but authorized) process is responding to
+ * IPC. Investigate further.
+ */
+ rc = pcmk_rc_ipc_unresponsive;
+ }
+ }
+ }
+ }
+
+ if (rc == pcmk_rc_ipc_unresponsive) {
+ /* If we get here, a child without IPC is being tracked, no IPC liveness
+ * has been detected, or IPC liveness has been detected with an
+ * unexpected (but authorized) process. This is safe on FreeBSD since
+ * the only change possible from a proper child's PID into "special" PID
+ * of 1 behind more loosely related process.
+ */
+ int ret = pcmk__pid_active(child->pid, child->name);
+
+ if (ipc_pid && ((ret != pcmk_rc_ok)
+ || ipc_pid == PCMK__SPECIAL_PID
+ || (pcmk__pid_active(ipc_pid,
+ child->name) == pcmk_rc_ok))) {
+ /* An unexpected (but authorized) process was detected at the IPC
+ * endpoint, and either it is active, or the child we're tracking is
+ * not.
+ */
+
+ if (ret == pcmk_rc_ok) {
+ /* The child we're tracking is active. Kill it, and adopt the
+ * detected process. This assumes that our children don't fork
+ * (thus getting a different PID owning the IPC), but rather the
+ * tracking got out of sync because of some means external to
+ * Pacemaker, and adopting the detected process is better than
+ * killing it and possibly having to spawn a new child.
+ */
+ /* not possessing IPC, afterall (what about corosync CPG?) */
+ stop_child(child, SIGKILL);
+ }
+ rc = pcmk_rc_ok;
+ child->pid = ipc_pid;
+ } else if (ret == pcmk_rc_ok) {
+ // Our tracked child's PID was found active, but not its IPC
+ rc = pcmk_rc_ipc_pid_only;
+ } else if ((child->pid == 0) && (ret == EINVAL)) {
+ // FreeBSD can return EINVAL
+ rc = pcmk_rc_ipc_unresponsive;
+ } else {
+ switch (ret) {
+ case EACCES:
+ rc = pcmk_rc_ipc_unauthorized;
+ break;
+ case ESRCH:
+ rc = pcmk_rc_ipc_unresponsive;
+ break;
+ default:
+ rc = ret;
+ break;
+ }
+ }
+ }
+ return rc;
+}
+
+/*!
+ * \internal
+ * \brief Initial one-off check of the pre-existing "child" processes
+ *
+ * With "child" process, we mean the subdaemon that defines an API end-point
+ * (all of them do as of the comment) -- the possible complement is skipped
+ * as it is deemed it has no such shared resources to cause conflicts about,
+ * hence it can presumably be started anew without hesitation.
+ * If that won't hold true in the future, the concept of a shared resource
+ * will have to be generalized beyond the API end-point.
+ *
+ * For boundary cases that the "child" is still starting (IPC end-point is yet
+ * to be witnessed), or more rarely (practically FreeBSD only), when there's
+ * a pre-existing "untrackable" authentic process, we give the situation some
+ * time to possibly unfold in the right direction, meaning that said socket
+ * will appear or the unattainable process will disappear per the observable
+ * IPC, respectively.
+ *
+ * \return Standard Pacemaker return code
+ *
+ * \note Since this gets run at the very start, \c respawn_count fields
+ * for particular children get temporarily overloaded with "rounds
+ * of waiting" tracking, restored once we are about to finish with
+ * success (i.e. returning value >=0) and will remain unrestored
+ * otherwise. One way to suppress liveness detection logic for
+ * particular child is to set the said value to a negative number.
+ */
+#define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */
+int
+find_and_track_existing_processes(void)
+{
+ bool wait_in_progress;
+ int rc;
+ size_t i, rounds;
+
+ for (rounds = 1; rounds <= WAIT_TRIES; rounds++) {
+ wait_in_progress = false;
+ for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
+
+ if ((pcmk_children[i].endpoint == NULL)
+ || (pcmk_children[i].respawn_count < 0)) {
+ continue;
+ }
+
+ rc = child_liveness(&pcmk_children[i]);
+ if (rc == pcmk_rc_ipc_unresponsive) {
+ /* As a speculation, don't give up if there are more rounds to
+ * come for other reasons, but don't artificially wait just
+ * because of this, since we would preferably start ASAP.
+ */
+ continue;
+ }
+
+ pcmk_children[i].respawn_count = rounds;
+ switch (rc) {
+ case pcmk_rc_ok:
+ if (pcmk_children[i].pid == PCMK__SPECIAL_PID) {
+ if (crm_is_true(getenv("PCMK_fail_fast"))) {
+ crm_crit("Cannot reliably track pre-existing"
+ " authentic process behind %s IPC on this"
+ " platform and PCMK_fail_fast requested",
+ pcmk_children[i].endpoint);
+ return EOPNOTSUPP;
+ } else if (pcmk_children[i].respawn_count == WAIT_TRIES) {
+ crm_notice("Assuming pre-existing authentic, though"
+ " on this platform untrackable, process"
+ " behind %s IPC is stable (was in %d"
+ " previous samples) so rather than"
+ " bailing out (PCMK_fail_fast not"
+ " requested), we just switch to a less"
+ " optimal IPC liveness monitoring"
+ " (not very suitable for heavy load)",
+ pcmk_children[i].name, WAIT_TRIES - 1);
+ crm_warn("The process behind %s IPC cannot be"
+ " terminated, so the overall shutdown"
+ " will get delayed implicitly (%ld s),"
+ " which serves as a graceful period for"
+ " its native termination if it vitally"
+ " depends on some other daemons going"
+ " down in a controlled way already",
+ pcmk_children[i].name,
+ (long) SHUTDOWN_ESCALATION_PERIOD);
+ } else {
+ wait_in_progress = true;
+ crm_warn("Cannot reliably track pre-existing"
+ " authentic process behind %s IPC on this"
+ " platform, can still disappear in %d"
+ " attempt(s)", pcmk_children[i].endpoint,
+ WAIT_TRIES - pcmk_children[i].respawn_count);
+ continue;
+ }
+ }
+ crm_notice("Tracking existing %s process (pid=%lld)",
+ pcmk_children[i].name,
+ (long long) PCMK__SPECIAL_PID_AS_0(
+ pcmk_children[i].pid));
+ pcmk_children[i].respawn_count = -1; /* 0~keep watching */
+ pcmk_children[i].active_before_startup = true;
+ break;
+ case pcmk_rc_ipc_pid_only:
+ if (pcmk_children[i].respawn_count == WAIT_TRIES) {
+ crm_crit("%s IPC end-point for existing authentic"
+ " process %lld did not (re)appear",
+ pcmk_children[i].endpoint,
+ (long long) PCMK__SPECIAL_PID_AS_0(
+ pcmk_children[i].pid));
+ return rc;
+ }
+ wait_in_progress = true;
+ crm_warn("Cannot find %s IPC end-point for existing"
+ " authentic process %lld, can still (re)appear"
+ " in %d attempts (?)",
+ pcmk_children[i].endpoint,
+ (long long) PCMK__SPECIAL_PID_AS_0(
+ pcmk_children[i].pid),
+ WAIT_TRIES - pcmk_children[i].respawn_count);
+ continue;
+ default:
+ crm_crit("Checked liveness of %s: %s " CRM_XS " rc=%d",
+ pcmk_children[i].name, pcmk_rc_str(rc), rc);
+ return rc;
+ }
+ }
+ if (!wait_in_progress) {
+ break;
+ }
+ pcmk__sleep_ms(250); // Wait a bit for changes to possibly happen
+ }
+ for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
+ pcmk_children[i].respawn_count = 0; /* restore pristine state */
+ }
+
+ g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon,
+ NULL);
+ return pcmk_rc_ok;
+}
+
+gboolean
+init_children_processes(void *user_data)
+{
+ if (is_corosync_cluster()) {
+ /* Corosync clusters can drop root group access, because we set
+ * uidgid.gid.${gid}=1 via CMAP, which allows these processes to connect
+ * to corosync.
+ */
+ need_root_group = false;
+ }
+
+ /* start any children that have not been detected */
+ for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
+ if (pcmk_children[i].pid != 0) {
+ /* we are already tracking it */
+ continue;
+ }
+
+ start_child(&(pcmk_children[i]));
+ }
+
+ /* From this point on, any daemons being started will be due to
+ * respawning rather than node start.
+ *
+ * This may be useful for the daemons to know
+ */
+ setenv("PCMK_respawned", "true", 1);
+ pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING;
+ return TRUE;
+}
+
+void
+pcmk_shutdown(int nsig)
+{
+ if (shutdown_trigger == NULL) {
+ shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
+ }
+ mainloop_set_trigger(shutdown_trigger);
+}
+
+void
+restart_cluster_subdaemons(void)
+{
+ for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
+ if (!pcmk_children[i].needs_retry || pcmk_children[i].pid != 0) {
+ continue;
+ }
+
+ crm_notice("Respawning cluster-based subdaemon: %s", pcmk_children[i].name);
+ if (start_child(&pcmk_children[i])) {
+ pcmk_children[i].needs_retry = false;
+ }
+ }
+}
+
+static gboolean
+stop_child(pcmk_child_t * child, int signal)
+{
+ if (signal == 0) {
+ signal = SIGTERM;
+ }
+
+ /* why to skip PID of 1?
+ - FreeBSD ~ how untrackable process behind IPC is masqueraded as
+ - elsewhere: how "init" task is designated; in particular, in systemd
+ arrangement of socket-based activation, this is pretty real */
+ if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) {
+ crm_debug("Nothing to do for child \"%s\" (process %lld)",
+ child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
+ return TRUE;
+ }
+
+ if (child->pid <= 0) {
+ crm_trace("Client %s not running", child->name);
+ return TRUE;
+ }
+
+ errno = 0;
+ if (kill(child->pid, signal) == 0) {
+ crm_notice("Stopping %s "CRM_XS" sent signal %d to process %lld",
+ child->name, signal, (long long) child->pid);
+
+ } else {
+ crm_err("Could not stop %s (process %lld) with signal %d: %s",
+ child->name, (long long) child->pid, signal, strerror(errno));
+ }
+
+ return TRUE;
+}
+
diff --git a/daemons/schedulerd/Makefile.am b/daemons/schedulerd/Makefile.am
new file mode 100644
index 0000000..57e819b
--- /dev/null
+++ b/daemons/schedulerd/Makefile.am
@@ -0,0 +1,53 @@
+#
+# Copyright 2004-2021 the Pacemaker project contributors
+#
+# The version control history for this file may have further details.
+#
+# This source code is licensed under the GNU General Public License version 2
+# or later (GPLv2+) WITHOUT ANY WARRANTY.
+#
+
+include $(top_srcdir)/mk/common.mk
+include $(top_srcdir)/mk/man.mk
+
+AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir)
+
+halibdir = $(CRM_DAEMON_DIR)
+
+## binary progs
+
+halib_PROGRAMS = pacemaker-schedulerd
+
+if BUILD_XML_HELP
+man7_MANS = pacemaker-schedulerd.7
+endif
+
+## SOURCES
+
+noinst_HEADERS = pacemaker-schedulerd.h
+
+pacemaker_schedulerd_CFLAGS = $(CFLAGS_HARDENED_EXE)
+pacemaker_schedulerd_LDFLAGS = $(LDFLAGS_HARDENED_EXE)
+pacemaker_schedulerd_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \
+ $(top_builddir)/lib/pengine/libpe_status.la \
+ $(top_builddir)/lib/pacemaker/libpacemaker.la
+# libcib for get_object_root()
+pacemaker_schedulerd_SOURCES = pacemaker-schedulerd.c
+pacemaker_schedulerd_SOURCES += schedulerd_messages.c
+
+install-exec-local:
+ $(INSTALL) -d -m 750 $(DESTDIR)/$(PE_STATE_DIR)
+ -chown $(CRM_DAEMON_USER):$(CRM_DAEMON_GROUP) $(DESTDIR)/$(PE_STATE_DIR)
+
+if BUILD_LEGACY_LINKS
+install-exec-hook:
+ cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f pengine && $(LN_S) pacemaker-schedulerd pengine
+
+uninstall-hook:
+ cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f pengine
+endif
+
+uninstall-local:
+ -rmdir $(DESTDIR)/$(PE_STATE_DIR)
+
+CLEANFILES = $(man7_MANS)
diff --git a/daemons/schedulerd/pacemaker-schedulerd.c b/daemons/schedulerd/pacemaker-schedulerd.c
new file mode 100644
index 0000000..3f2a3e8
--- /dev/null
+++ b/daemons/schedulerd/pacemaker-schedulerd.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <crm/crm.h>
+#include <stdio.h>
+#include <stdbool.h>
+
+#include <stdlib.h>
+#include <errno.h>
+
+#include <crm/common/cmdline_internal.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/common/mainloop.h>
+#include <crm/pengine/internal.h>
+#include <pacemaker-internal.h>
+
+#include "pacemaker-schedulerd.h"
+
+#define SUMMARY "pacemaker-schedulerd - daemon for calculating a Pacemaker cluster's response to events"
+
+struct {
+ gchar **remainder;
+} options;
+
+pcmk__output_t *logger_out = NULL;
+pcmk__output_t *out = NULL;
+
+static GMainLoop *mainloop = NULL;
+static qb_ipcs_service_t *ipcs = NULL;
+static crm_exit_t exit_code = CRM_EX_OK;
+
+pcmk__supported_format_t formats[] = {
+ PCMK__SUPPORTED_FORMAT_NONE,
+ PCMK__SUPPORTED_FORMAT_TEXT,
+ PCMK__SUPPORTED_FORMAT_XML,
+ { NULL, NULL, NULL }
+};
+
+void pengine_shutdown(int nsig);
+
+static GOptionContext *
+build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
+ GOptionContext *context = NULL;
+
+ GOptionEntry extra_prog_entries[] = {
+ { G_OPTION_REMAINING, 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING_ARRAY, &options.remainder,
+ NULL,
+ NULL },
+
+ { NULL }
+ };
+
+ context = pcmk__build_arg_context(args, "text (default), xml", group,
+ "[metadata]");
+ pcmk__add_main_args(context, extra_prog_entries);
+ return context;
+}
+
+int
+main(int argc, char **argv)
+{
+ GError *error = NULL;
+ int rc = pcmk_rc_ok;
+
+ GOptionGroup *output_group = NULL;
+ pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
+ gchar **processed_args = pcmk__cmdline_preproc(argv, NULL);
+ GOptionContext *context = build_arg_context(args, &output_group);
+
+ crm_log_preinit(NULL, argc, argv);
+ mainloop_add_signal(SIGTERM, pengine_shutdown);
+
+ pcmk__register_formats(output_group, formats);
+ if (!g_option_context_parse_strv(context, &processed_args, &error)) {
+ exit_code = CRM_EX_USAGE;
+ goto done;
+ }
+
+ rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
+ if ((rc != pcmk_rc_ok) || (out == NULL)) {
+ exit_code = CRM_EX_FATAL;
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s",
+ args->output_ty, pcmk_rc_str(rc));
+ goto done;
+ }
+
+ pe__register_messages(out);
+ pcmk__register_lib_messages(out);
+
+ if (options.remainder) {
+ if (g_strv_length(options.remainder) == 1 &&
+ pcmk__str_eq("metadata", options.remainder[0], pcmk__str_casei)) {
+ pe_metadata(out);
+ goto done;
+ } else {
+ exit_code = CRM_EX_USAGE;
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Unsupported extra command line parameters");
+ goto done;
+ }
+ }
+
+ if (args->version) {
+ out->version(out, false);
+ goto done;
+ }
+
+ pcmk__cli_init_logging("pacemaker-schedulerd", args->verbosity);
+ crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
+ crm_notice("Starting Pacemaker scheduler");
+
+ if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) {
+ crm_err("Terminating due to bad permissions on " PE_STATE_DIR);
+ exit_code = CRM_EX_FATAL;
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "ERROR: Bad permissions on %s (see logs for details)", PE_STATE_DIR);
+ goto done;
+ }
+
+ ipcs = pcmk__serve_schedulerd_ipc(&ipc_callbacks);
+ if (ipcs == NULL) {
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Failed to create pacemaker-schedulerd server: exiting and inhibiting respawn");
+ exit_code = CRM_EX_FATAL;
+ goto done;
+ }
+
+ if (pcmk__log_output_new(&logger_out) != pcmk_rc_ok) {
+ exit_code = CRM_EX_FATAL;
+ goto done;
+ }
+ pe__register_messages(logger_out);
+ pcmk__register_lib_messages(logger_out);
+ pcmk__output_set_log_level(logger_out, LOG_TRACE);
+
+ /* Create the mainloop and run it... */
+ mainloop = g_main_loop_new(NULL, FALSE);
+ crm_notice("Pacemaker scheduler successfully started and accepting connections");
+ g_main_loop_run(mainloop);
+
+done:
+ g_strfreev(options.remainder);
+ g_strfreev(processed_args);
+ pcmk__free_arg_context(context);
+
+ pcmk__output_and_clear_error(&error, out);
+ pengine_shutdown(0);
+}
+
+void
+pengine_shutdown(int nsig)
+{
+ if (ipcs != NULL) {
+ crm_trace("Closing IPC server");
+ mainloop_del_ipc_server(ipcs);
+ ipcs = NULL;
+ }
+
+ if (logger_out != NULL) {
+ logger_out->finish(logger_out, exit_code, true, NULL);
+ pcmk__output_free(logger_out);
+ logger_out = NULL;
+ }
+
+ if (out != NULL) {
+ out->finish(out, exit_code, true, NULL);
+ pcmk__output_free(out);
+ out = NULL;
+ }
+
+ pcmk__unregister_formats();
+ crm_exit(exit_code);
+}
diff --git a/daemons/schedulerd/pacemaker-schedulerd.h b/daemons/schedulerd/pacemaker-schedulerd.h
new file mode 100644
index 0000000..cbb07e1
--- /dev/null
+++ b/daemons/schedulerd/pacemaker-schedulerd.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef PCMK__PACEMAKER_SCHEDULERD__H
+#define PCMK__PACEMAKER_SCHEDULERD__H
+
+#include <crm_internal.h>
+#include <crm/pengine/pe_types.h>
+
+extern pcmk__output_t *logger_out;
+extern pcmk__output_t *out;
+extern struct qb_ipcs_service_handlers ipc_callbacks;
+
+#endif
diff --git a/daemons/schedulerd/schedulerd_messages.c b/daemons/schedulerd/schedulerd_messages.c
new file mode 100644
index 0000000..1c124d2
--- /dev/null
+++ b/daemons/schedulerd/schedulerd_messages.c
@@ -0,0 +1,335 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <pacemaker-internal.h>
+
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "pacemaker-schedulerd.h"
+
+static GHashTable *schedulerd_handlers = NULL;
+
+static pe_working_set_t *
+init_working_set(void)
+{
+ pe_working_set_t *data_set = pe_new_working_set();
+
+ CRM_ASSERT(data_set != NULL);
+
+ crm_config_error = FALSE;
+ crm_config_warning = FALSE;
+
+ was_processing_error = FALSE;
+ was_processing_warning = FALSE;
+
+ data_set->priv = logger_out;
+ return data_set;
+}
+
+static xmlNode *
+handle_pecalc_request(pcmk__request_t *request)
+{
+ static struct series_s {
+ const char *name;
+ const char *param;
+
+ /* Maximum number of inputs of this kind to save to disk.
+ * If -1, save all; if 0, save none.
+ */
+ int wrap;
+ } series[] = {
+ { "pe-error", "pe-error-series-max", -1 },
+ { "pe-warn", "pe-warn-series-max", 5000 },
+ { "pe-input", "pe-input-series-max", 4000 },
+ };
+
+ xmlNode *msg = request->xml;
+ xmlNode *xml_data = get_message_xml(msg, F_CRM_DATA);
+
+ static char *last_digest = NULL;
+ static char *filename = NULL;
+
+ unsigned int seq;
+ int series_id = 0;
+ int series_wrap = 0;
+ char *digest = NULL;
+ const char *value = NULL;
+ time_t execution_date = time(NULL);
+ xmlNode *converted = NULL;
+ xmlNode *reply = NULL;
+ bool is_repoke = false;
+ bool process = true;
+ pe_working_set_t *data_set = init_working_set();
+
+ pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags,
+ "ack", NULL, CRM_EX_INDETERMINATE);
+
+ digest = calculate_xml_versioned_digest(xml_data, FALSE, FALSE,
+ CRM_FEATURE_SET);
+ converted = copy_xml(xml_data);
+ if (!cli_config_update(&converted, NULL, TRUE)) {
+ data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH);
+ crm_xml_add_int(data_set->graph, "transition_id", 0);
+ crm_xml_add_int(data_set->graph, "cluster-delay", 0);
+ process = false;
+ free(digest);
+
+ } else if (pcmk__str_eq(digest, last_digest, pcmk__str_casei)) {
+ is_repoke = true;
+ free(digest);
+
+ } else {
+ free(last_digest);
+ last_digest = digest;
+ }
+
+ if (process) {
+ pcmk__schedule_actions(converted,
+ pe_flag_no_counts
+ |pe_flag_no_compat
+ |pe_flag_show_utilization, data_set);
+ }
+
+ // Get appropriate index into series[] array
+ if (was_processing_error) {
+ series_id = 0;
+ } else if (was_processing_warning) {
+ series_id = 1;
+ } else {
+ series_id = 2;
+ }
+
+ value = pe_pref(data_set->config_hash, series[series_id].param);
+ if ((value == NULL)
+ || (pcmk__scan_min_int(value, &series_wrap, -1) != pcmk_rc_ok)) {
+ series_wrap = series[series_id].wrap;
+ }
+
+ if (pcmk__read_series_sequence(PE_STATE_DIR, series[series_id].name,
+ &seq) != pcmk_rc_ok) {
+ // @TODO maybe handle errors better ...
+ seq = 0;
+ }
+ crm_trace("Series %s: wrap=%d, seq=%u, pref=%s",
+ series[series_id].name, series_wrap, seq, value);
+
+ data_set->input = NULL;
+ reply = create_reply(msg, data_set->graph);
+
+ if (reply == NULL) {
+ pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
+ "Failed building ping reply for client %s",
+ pcmk__client_name(request->ipc_client));
+ goto done;
+ }
+
+ if (series_wrap == 0) { // Don't save any inputs of this kind
+ free(filename);
+ filename = NULL;
+
+ } else if (!is_repoke) { // Input changed, save to disk
+ free(filename);
+ filename = pcmk__series_filename(PE_STATE_DIR,
+ series[series_id].name, seq, true);
+ }
+
+ crm_xml_add(reply, F_CRM_TGRAPH_INPUT, filename);
+ crm_xml_add_int(reply, PCMK__XA_GRAPH_ERRORS, was_processing_error);
+ crm_xml_add_int(reply, PCMK__XA_GRAPH_WARNINGS, was_processing_warning);
+ crm_xml_add_int(reply, PCMK__XA_CONFIG_ERRORS, crm_config_error);
+ crm_xml_add_int(reply, PCMK__XA_CONFIG_WARNINGS, crm_config_warning);
+
+ pcmk__log_transition_summary(filename);
+
+ if (series_wrap == 0) {
+ crm_debug("Not saving input to disk (disabled by configuration)");
+
+ } else if (is_repoke) {
+ crm_info("Input has not changed since last time, not saving to disk");
+
+ } else {
+ unlink(filename);
+ crm_xml_add_ll(xml_data, "execution-date", (long long) execution_date);
+ write_xml_file(xml_data, filename, TRUE);
+ pcmk__write_series_sequence(PE_STATE_DIR, series[series_id].name,
+ ++seq, series_wrap);
+ }
+
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+
+done:
+ free_xml(converted);
+ pe_free_working_set(data_set);
+
+ return reply;
+}
+
+static xmlNode *
+handle_unknown_request(pcmk__request_t *request)
+{
+ pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags,
+ "ack", NULL, CRM_EX_INVALID_PARAM);
+
+ pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
+ "Unknown IPC request type '%s' (bug?)",
+ pcmk__client_name(request->ipc_client));
+ return NULL;
+}
+
+static xmlNode *
+handle_hello_request(pcmk__request_t *request)
+{
+ pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags,
+ "ack", NULL, CRM_EX_INDETERMINATE);
+
+ crm_trace("Received IPC hello from %s", pcmk__client_name(request->ipc_client));
+
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ return NULL;
+}
+
+static void
+schedulerd_register_handlers(void)
+{
+ pcmk__server_command_t handlers[] = {
+ { CRM_OP_HELLO, handle_hello_request },
+ { CRM_OP_PECALC, handle_pecalc_request },
+ { NULL, handle_unknown_request },
+ };
+
+ schedulerd_handlers = pcmk__register_handlers(handlers);
+}
+
+static int32_t
+pe_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
+{
+ crm_trace("Connection %p", c);
+ if (pcmk__new_client(c, uid, gid) == NULL) {
+ return -EIO;
+ }
+ return 0;
+}
+
+static int32_t
+pe_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
+{
+ uint32_t id = 0;
+ uint32_t flags = 0;
+ xmlNode *msg = NULL;
+ pcmk__client_t *c = pcmk__find_client(qbc);
+ const char *sys_to = NULL;
+
+ CRM_CHECK(c != NULL, return 0);
+
+ if (schedulerd_handlers == NULL) {
+ schedulerd_register_handlers();
+ }
+
+ msg = pcmk__client_data2xml(c, data, &id, &flags);
+ if (msg == NULL) {
+ pcmk__ipc_send_ack(c, id, flags, "ack", NULL, CRM_EX_PROTOCOL);
+ return 0;
+ }
+
+ sys_to = crm_element_value(msg, F_CRM_SYS_TO);
+
+ if (pcmk__str_eq(crm_element_value(msg, F_CRM_MSG_TYPE),
+ XML_ATTR_RESPONSE, pcmk__str_none)) {
+ pcmk__ipc_send_ack(c, id, flags, "ack", NULL, CRM_EX_INDETERMINATE);
+ crm_info("Ignoring IPC reply from %s", pcmk__client_name(c));
+
+ } else if (!pcmk__str_eq(sys_to, CRM_SYSTEM_PENGINE, pcmk__str_none)) {
+ pcmk__ipc_send_ack(c, id, flags, "ack", NULL, CRM_EX_INDETERMINATE);
+ crm_info("Ignoring invalid IPC message: to '%s' not "
+ CRM_SYSTEM_PENGINE, pcmk__s(sys_to, ""));
+
+ } else {
+ char *log_msg = NULL;
+ const char *reason = NULL;
+ xmlNode *reply = NULL;
+
+ pcmk__request_t request = {
+ .ipc_client = c,
+ .ipc_id = id,
+ .ipc_flags = flags,
+ .peer = NULL,
+ .xml = msg,
+ .call_options = 0,
+ .result = PCMK__UNKNOWN_RESULT,
+ };
+
+ request.op = crm_element_value_copy(request.xml, F_CRM_TASK);
+ CRM_CHECK(request.op != NULL, return 0);
+
+ reply = pcmk__process_request(&request, schedulerd_handlers);
+
+ if (reply != NULL) {
+ pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event);
+ free_xml(reply);
+ }
+
+ reason = request.result.exit_reason;
+
+ log_msg = crm_strdup_printf("Processed %s request from %s %s: %s%s%s%s",
+ request.op, pcmk__request_origin_type(&request),
+ pcmk__request_origin(&request),
+ pcmk_exec_status_str(request.result.execution_status),
+ (reason == NULL)? "" : " (",
+ (reason == NULL)? "" : reason,
+ (reason == NULL)? "" : ")");
+
+ if (!pcmk__result_ok(&request.result)) {
+ crm_warn("%s", log_msg);
+ } else {
+ crm_debug("%s", log_msg);
+ }
+
+ free(log_msg);
+ pcmk__reset_request(&request);
+ }
+
+ free_xml(msg);
+ return 0;
+}
+
+/* Error code means? */
+static int32_t
+pe_ipc_closed(qb_ipcs_connection_t * c)
+{
+ pcmk__client_t *client = pcmk__find_client(c);
+
+ if (client == NULL) {
+ return 0;
+ }
+ crm_trace("Connection %p", c);
+ pcmk__free_client(client);
+ return 0;
+}
+
+static void
+pe_ipc_destroy(qb_ipcs_connection_t * c)
+{
+ crm_trace("Connection %p", c);
+ pe_ipc_closed(c);
+}
+
+struct qb_ipcs_service_handlers ipc_callbacks = {
+ .connection_accept = pe_ipc_accept,
+ .connection_created = NULL,
+ .msg_process = pe_ipc_dispatch,
+ .connection_closed = pe_ipc_closed,
+ .connection_destroyed = pe_ipc_destroy
+};