summaryrefslogtreecommitdiffstats
path: root/daemons/controld
diff options
context:
space:
mode:
Diffstat (limited to 'daemons/controld')
-rw-r--r--daemons/controld/Makefile.am87
-rw-r--r--daemons/controld/controld_alerts.c88
-rw-r--r--daemons/controld/controld_alerts.h22
-rw-r--r--daemons/controld/controld_attrd.c160
-rw-r--r--daemons/controld/controld_callbacks.c367
-rw-r--r--daemons/controld/controld_callbacks.h21
-rw-r--r--daemons/controld/controld_cib.c1138
-rw-r--r--daemons/controld/controld_cib.h125
-rw-r--r--daemons/controld/controld_control.c857
-rw-r--r--daemons/controld/controld_corosync.c164
-rw-r--r--daemons/controld/controld_election.c292
-rw-r--r--daemons/controld/controld_execd.c2433
-rw-r--r--daemons/controld/controld_execd_state.c814
-rw-r--r--daemons/controld/controld_fencing.c1108
-rw-r--r--daemons/controld/controld_fencing.h38
-rw-r--r--daemons/controld/controld_fsa.c741
-rw-r--r--daemons/controld/controld_fsa.h694
-rw-r--r--daemons/controld/controld_globals.h143
-rw-r--r--daemons/controld/controld_join_client.c366
-rw-r--r--daemons/controld/controld_join_dc.c987
-rw-r--r--daemons/controld/controld_lrm.h188
-rw-r--r--daemons/controld/controld_matrix.c1250
-rw-r--r--daemons/controld/controld_membership.c457
-rw-r--r--daemons/controld/controld_membership.h29
-rw-r--r--daemons/controld/controld_messages.c1307
-rw-r--r--daemons/controld/controld_messages.h86
-rw-r--r--daemons/controld/controld_metadata.c320
-rw-r--r--daemons/controld/controld_metadata.h96
-rw-r--r--daemons/controld/controld_remote_ra.c1440
-rw-r--r--daemons/controld/controld_schedulerd.c506
-rw-r--r--daemons/controld/controld_te_actions.c746
-rw-r--r--daemons/controld/controld_te_callbacks.c689
-rw-r--r--daemons/controld/controld_te_events.c601
-rw-r--r--daemons/controld/controld_te_utils.c367
-rw-r--r--daemons/controld/controld_throttle.c574
-rw-r--r--daemons/controld/controld_throttle.h16
-rw-r--r--daemons/controld/controld_timers.c509
-rw-r--r--daemons/controld/controld_timers.h36
-rw-r--r--daemons/controld/controld_transition.c197
-rw-r--r--daemons/controld/controld_transition.h63
-rw-r--r--daemons/controld/controld_utils.c837
-rw-r--r--daemons/controld/controld_utils.h61
-rw-r--r--daemons/controld/pacemaker-controld.c205
-rw-r--r--daemons/controld/pacemaker-controld.h39
44 files changed, 21264 insertions, 0 deletions
diff --git a/daemons/controld/Makefile.am b/daemons/controld/Makefile.am
new file mode 100644
index 0000000..08be1ff
--- /dev/null
+++ b/daemons/controld/Makefile.am
@@ -0,0 +1,87 @@
+#
+# Copyright 2018-2023 the Pacemaker project contributors
+#
+# The version control history for this file may have further details.
+#
+# This source code is licensed under the GNU General Public License version 2
+# or later (GPLv2+) WITHOUT ANY WARRANTY.
+#
+
+include $(top_srcdir)/mk/common.mk
+include $(top_srcdir)/mk/man.mk
+
+halibdir = $(CRM_DAEMON_DIR)
+
+halib_PROGRAMS = pacemaker-controld
+
+noinst_HEADERS = controld_alerts.h \
+ controld_callbacks.h \
+ controld_cib.h \
+ controld_fencing.h \
+ controld_fsa.h \
+ controld_globals.h \
+ controld_lrm.h \
+ controld_membership.h \
+ controld_messages.h \
+ controld_metadata.h \
+ controld_throttle.h \
+ controld_timers.h \
+ controld_transition.h \
+ controld_utils.h \
+ pacemaker-controld.h
+
+pacemaker_controld_CFLAGS = $(CFLAGS_HARDENED_EXE)
+pacemaker_controld_LDFLAGS = $(LDFLAGS_HARDENED_EXE)
+
+pacemaker_controld_LDADD = $(top_builddir)/lib/fencing/libstonithd.la \
+ $(top_builddir)/lib/pacemaker/libpacemaker.la \
+ $(top_builddir)/lib/pengine/libpe_rules.la \
+ $(top_builddir)/lib/cib/libcib.la \
+ $(top_builddir)/lib/cluster/libcrmcluster.la \
+ $(top_builddir)/lib/common/libcrmcommon.la \
+ $(top_builddir)/lib/services/libcrmservice.la \
+ $(top_builddir)/lib/lrmd/liblrmd.la \
+ $(CLUSTERLIBS)
+
+pacemaker_controld_SOURCES = pacemaker-controld.c \
+ controld_alerts.c \
+ controld_attrd.c \
+ controld_callbacks.c \
+ controld_cib.c \
+ controld_control.c \
+ controld_corosync.c \
+ controld_election.c \
+ controld_execd.c \
+ controld_execd_state.c \
+ controld_fencing.c \
+ controld_fsa.c \
+ controld_join_client.c \
+ controld_join_dc.c \
+ controld_matrix.c \
+ controld_membership.c \
+ controld_messages.c \
+ controld_metadata.c \
+ controld_remote_ra.c \
+ controld_schedulerd.c \
+ controld_te_actions.c \
+ controld_te_callbacks.c \
+ controld_te_events.c \
+ controld_te_utils.c \
+ controld_throttle.c \
+ controld_timers.c \
+ controld_transition.c \
+ controld_utils.c
+
+if BUILD_XML_HELP
+man7_MANS = pacemaker-controld.7
+endif
+
+CLEANFILES = $(man7_MANS)
+
+if BUILD_LEGACY_LINKS
+install-exec-hook:
+ cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f crmd && $(LN_S) pacemaker-controld crmd
+
+uninstall-hook:
+ cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f crmd
+endif
diff --git a/daemons/controld/controld_alerts.c b/daemons/controld/controld_alerts.c
new file mode 100644
index 0000000..27a5ce2
--- /dev/null
+++ b/daemons/controld/controld_alerts.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2012-2021 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <glib.h>
+#include <libxml/tree.h>
+
+#include <crm/fencing/internal.h>
+#include <crm/lrmd.h>
+#include <crm/lrmd_internal.h>
+#include <crm/pengine/rules_internal.h>
+#include <crm/pengine/status.h>
+#include <crm/stonith-ng.h>
+
+#include <pacemaker-controld.h>
+
+static GList *crmd_alert_list = NULL;
+
+void
+crmd_unpack_alerts(xmlNode *alerts)
+{
+ pe_free_alert_list(crmd_alert_list);
+ crmd_alert_list = pe_unpack_alerts(alerts);
+}
+
+void
+crmd_alert_node_event(crm_node_t *node)
+{
+ lrm_state_t *lrm_state;
+
+ if (crmd_alert_list == NULL) {
+ return;
+ }
+
+ lrm_state = lrm_state_find(controld_globals.our_nodename);
+ if (lrm_state == NULL) {
+ return;
+ }
+
+ lrmd_send_node_alert((lrmd_t *) lrm_state->conn, crmd_alert_list,
+ node->uname, node->id, node->state);
+}
+
+void
+crmd_alert_fencing_op(stonith_event_t * e)
+{
+ char *desc;
+ lrm_state_t *lrm_state;
+
+ if (crmd_alert_list == NULL) {
+ return;
+ }
+
+ lrm_state = lrm_state_find(controld_globals.our_nodename);
+ if (lrm_state == NULL) {
+ return;
+ }
+
+ desc = stonith__event_description(e);
+ lrmd_send_fencing_alert((lrmd_t *) lrm_state->conn, crmd_alert_list,
+ e->target, e->operation, desc, e->result);
+ free(desc);
+}
+
+void
+crmd_alert_resource_op(const char *node, lrmd_event_data_t * op)
+{
+ lrm_state_t *lrm_state;
+
+ if (crmd_alert_list == NULL) {
+ return;
+ }
+
+ lrm_state = lrm_state_find(controld_globals.our_nodename);
+ if (lrm_state == NULL) {
+ return;
+ }
+
+ lrmd_send_resource_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, node,
+ op);
+}
diff --git a/daemons/controld/controld_alerts.h b/daemons/controld/controld_alerts.h
new file mode 100644
index 0000000..ec5852a
--- /dev/null
+++ b/daemons/controld/controld_alerts.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2015-2021 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef CONTROLD_ALERTS__H
+# define CONTROLD_ALERTS__H
+
+# include <crm/crm.h>
+# include <crm/cluster.h>
+# include <crm/stonith-ng.h>
+
+void crmd_unpack_alerts(xmlNode *alerts);
+void crmd_alert_node_event(crm_node_t *node);
+void crmd_alert_fencing_op(stonith_event_t *e);
+void crmd_alert_resource_op(const char *node, lrmd_event_data_t *op);
+
+#endif
diff --git a/daemons/controld/controld_attrd.c b/daemons/controld/controld_attrd.c
new file mode 100644
index 0000000..923abb9
--- /dev/null
+++ b/daemons/controld/controld_attrd.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2006-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <crm/crm.h>
+#include <crm/common/attrd_internal.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_attrd_internal.h>
+#include <crm/msg_xml.h>
+
+#include <pacemaker-controld.h>
+
+static pcmk_ipc_api_t *attrd_api = NULL;
+
+void
+controld_close_attrd_ipc(void)
+{
+ if (attrd_api != NULL) {
+ crm_trace("Closing connection to pacemaker-attrd");
+ pcmk_disconnect_ipc(attrd_api);
+ pcmk_free_ipc_api(attrd_api);
+ attrd_api = NULL;
+ }
+}
+
+static inline const char *
+node_type(bool is_remote)
+{
+ return is_remote? "Pacemaker Remote" : "cluster";
+}
+
+static inline const char *
+when(void)
+{
+ return pcmk_is_set(controld_globals.fsa_input_register,
+ R_SHUTDOWN)? " at shutdown" : "";
+}
+
+static void
+handle_attr_error(void)
+{
+ if (AM_I_DC) {
+ /* We are unable to provide accurate information to the
+ * scheduler, so allow another node to take over DC.
+ * @TODO Should we do this unconditionally on any failure?
+ */
+ crmd_exit(CRM_EX_FATAL);
+
+ } else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ // Fast-track shutdown since unable to request via attribute
+ register_fsa_input(C_FSA_INTERNAL, I_FAIL, NULL);
+ }
+}
+
+void
+update_attrd(const char *host, const char *name, const char *value,
+ const char *user_name, gboolean is_remote_node)
+{
+ int rc = pcmk_rc_ok;
+
+ if (attrd_api == NULL) {
+ rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd);
+ }
+ if (rc == pcmk_rc_ok) {
+ uint32_t attrd_opts = pcmk__node_attr_value;
+
+ if (is_remote_node) {
+ pcmk__set_node_attr_flags(attrd_opts, pcmk__node_attr_remote);
+ }
+ rc = pcmk__attrd_api_update(attrd_api, host, name, value,
+ NULL, NULL, user_name, attrd_opts);
+ }
+ if (rc != pcmk_rc_ok) {
+ do_crm_log(AM_I_DC? LOG_CRIT : LOG_ERR,
+ "Could not update attribute %s=%s for %s node %s%s: %s "
+ CRM_XS " rc=%d", name, value, node_type(is_remote_node),
+ host, when(), pcmk_rc_str(rc), rc);
+ handle_attr_error();
+ }
+}
+
+void
+update_attrd_list(GList *attrs, uint32_t opts)
+{
+ int rc = pcmk_rc_ok;
+
+ if (attrd_api == NULL) {
+ rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd);
+ }
+ if (rc == pcmk_rc_ok) {
+ rc = pcmk__attrd_api_update_list(attrd_api, attrs, NULL, NULL, NULL,
+ opts | pcmk__node_attr_value);
+ }
+ if (rc != pcmk_rc_ok) {
+ do_crm_log(AM_I_DC? LOG_CRIT : LOG_ERR,
+ "Could not update multiple node attributes: %s "
+ CRM_XS " rc=%d", pcmk_rc_str(rc), rc);
+ handle_attr_error();
+ }
+}
+
+void
+update_attrd_remote_node_removed(const char *host, const char *user_name)
+{
+ int rc = pcmk_rc_ok;
+
+ if (attrd_api == NULL) {
+ rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd);
+ }
+ if (rc == pcmk_rc_ok) {
+ crm_trace("Asking attribute manager to purge Pacemaker Remote node %s",
+ host);
+ rc = pcmk__attrd_api_purge(attrd_api, host);
+ }
+ if (rc != pcmk_rc_ok) {
+ crm_err("Could not purge Pacemaker Remote node %s "
+ "in attribute manager%s: %s " CRM_XS " rc=%d",
+ host, when(), pcmk_rc_str(rc), rc);
+ }
+}
+
+void
+update_attrd_clear_failures(const char *host, const char *rsc, const char *op,
+ const char *interval_spec, gboolean is_remote_node)
+{
+ int rc = pcmk_rc_ok;
+
+ if (attrd_api == NULL) {
+ rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd);
+ }
+ if (rc == pcmk_rc_ok) {
+ const char *op_desc = pcmk__s(op, "operations");
+ const char *interval_desc = "all";
+ uint32_t attrd_opts = pcmk__node_attr_none;
+
+ if (op != NULL) {
+ interval_desc = pcmk__s(interval_spec, "nonrecurring");
+ }
+ if (is_remote_node) {
+ pcmk__set_node_attr_flags(attrd_opts, pcmk__node_attr_remote);
+ }
+ crm_info("Asking attribute manager to clear failure of %s %s for %s "
+ "on %s node %s", interval_desc, op_desc, rsc,
+ node_type(is_remote_node), host);
+ rc = pcmk__attrd_api_clear_failures(attrd_api, host, rsc, op,
+ interval_spec, NULL, attrd_opts);
+ }
+ if (rc != pcmk_rc_ok) {
+ crm_err("Could not clear failure attributes for %s on %s node %s%s: %s "
+ CRM_XS " rc=%d", pcmk__s(rsc, "all resources"),
+ node_type(is_remote_node), host, when(), pcmk_rc_str(rc), rc);
+ }
+}
diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c
new file mode 100644
index 0000000..d578adc
--- /dev/null
+++ b/daemons/controld/controld_callbacks.c
@@ -0,0 +1,367 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <string.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/cluster.h>
+#include <crm/cib.h>
+
+#include <pacemaker-controld.h>
+
+/* From join_dc... */
+extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
+
+void
+crmd_ha_msg_filter(xmlNode * msg)
+{
+ if (AM_I_DC) {
+ const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
+
+ if (pcmk__str_eq(sys_from, CRM_SYSTEM_DC, pcmk__str_casei)) {
+ const char *from = crm_element_value(msg, F_ORIG);
+
+ if (!pcmk__str_eq(from, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ int level = LOG_INFO;
+ const char *op = crm_element_value(msg, F_CRM_TASK);
+
+ /* make sure the election happens NOW */
+ if (controld_globals.fsa_state != S_ELECTION) {
+ ha_msg_input_t new_input;
+
+ level = LOG_WARNING;
+ new_input.msg = msg;
+ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input,
+ __func__);
+ }
+
+ do_crm_log(level, "Another DC detected: %s (op=%s)", from, op);
+ goto done;
+ }
+ }
+
+ } else {
+ const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
+
+ if (pcmk__str_eq(sys_to, CRM_SYSTEM_DC, pcmk__str_casei)) {
+ return;
+ }
+ }
+
+ /* crm_log_xml_trace(msg, "HA[inbound]"); */
+ route_message(C_HA_MESSAGE, msg);
+
+ done:
+ controld_trigger_fsa();
+}
+
+/*!
+ * \internal
+ * \brief Check whether a node is online
+ *
+ * \param[in] node Node to check
+ *
+ * \retval -1 if completely dead
+ * \retval 0 if partially alive
+ * \retval 1 if completely alive
+ */
+static int
+node_alive(const crm_node_t *node)
+{
+ if (pcmk_is_set(node->flags, crm_remote_node)) {
+ // Pacemaker Remote nodes can't be partially alive
+ return pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei) ? 1: -1;
+
+ } else if (crm_is_peer_active(node)) {
+ // Completely up cluster node: both cluster member and peer
+ return 1;
+
+ } else if (!pcmk_is_set(node->processes, crm_get_cluster_proc())
+ && !pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)) {
+ // Completely down cluster node: neither cluster member nor peer
+ return -1;
+ }
+
+ // Partially up cluster node: only cluster member or only peer
+ return 0;
+}
+
+#define state_text(state) ((state)? (const char *)(state) : "in unknown state")
+
+void
+peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
+{
+ uint32_t old = 0;
+ bool appeared = FALSE;
+ bool is_remote = pcmk_is_set(node->flags, crm_remote_node);
+
+ /* The controller waits to receive some information from the membership
+ * layer before declaring itself operational. If this is being called for a
+ * cluster node, indicate that we have it.
+ */
+ if (!is_remote) {
+ controld_set_fsa_input_flags(R_PEER_DATA);
+ }
+
+ if (type == crm_status_processes
+ && pcmk_is_set(node->processes, crm_get_cluster_proc())
+ && !AM_I_DC
+ && !is_remote) {
+ /*
+ * This is a hack until we can send to a nodeid and/or we fix node name lookups
+ * These messages are ignored in crmd_ha_msg_filter()
+ */
+ xmlNode *query = create_request(CRM_OP_HELLO, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
+
+ crm_debug("Sending hello to node %u so that it learns our node name", node->id);
+ send_cluster_message(node, crm_msg_crmd, query, FALSE);
+
+ free_xml(query);
+ }
+
+ if (node->uname == NULL) {
+ return;
+ }
+
+ switch (type) {
+ case crm_status_uname:
+ /* If we've never seen the node, then it also won't be in the status section */
+ crm_info("%s node %s is now %s",
+ (is_remote? "Remote" : "Cluster"),
+ node->uname, state_text(node->state));
+ return;
+
+ case crm_status_nstate:
+ /* This callback should not be called unless the state actually
+ * changed, but here's a failsafe just in case.
+ */
+ CRM_CHECK(!pcmk__str_eq(data, node->state, pcmk__str_casei),
+ return);
+
+ crm_info("%s node %s is now %s (was %s)",
+ (is_remote? "Remote" : "Cluster"),
+ node->uname, state_text(node->state), state_text(data));
+
+ if (pcmk__str_eq(CRM_NODE_MEMBER, node->state, pcmk__str_casei)) {
+ appeared = TRUE;
+ if (!is_remote) {
+ remove_stonith_cleanup(node->uname);
+ }
+ } else {
+ controld_remove_failed_sync_node(node->uname);
+ controld_remove_voter(node->uname);
+ }
+
+ crmd_alert_node_event(node);
+ break;
+
+ case crm_status_processes:
+ CRM_CHECK(data != NULL, return);
+ old = *(const uint32_t *)data;
+ appeared = pcmk_is_set(node->processes, crm_get_cluster_proc());
+
+ {
+ const char *dc_s = controld_globals.dc_name;
+
+ if ((dc_s == NULL) && AM_I_DC) {
+ dc_s = "true";
+ }
+
+ crm_info("Node %s is %s a peer " CRM_XS
+ " DC=%s old=%#07x new=%#07x",
+ node->uname, (appeared? "now" : "no longer"),
+ pcmk__s(dc_s, "<none>"), old, node->processes);
+ }
+
+ if (!pcmk_is_set((node->processes ^ old), crm_get_cluster_proc())) {
+ /* Peer status did not change. This should not be possible,
+ * since we don't track process flags other than peer status.
+ */
+ crm_trace("Process flag %#7x did not change from %#7x to %#7x",
+ crm_get_cluster_proc(), old, node->processes);
+ return;
+
+ }
+
+ if (!appeared) {
+ node->peer_lost = time(NULL);
+ controld_remove_failed_sync_node(node->uname);
+ controld_remove_voter(node->uname);
+ }
+
+ if (!pcmk_is_set(controld_globals.fsa_input_register,
+ R_CIB_CONNECTED)) {
+ crm_trace("Ignoring peer status change because not connected to CIB");
+ return;
+
+ } else if (controld_globals.fsa_state == S_STOPPING) {
+ crm_trace("Ignoring peer status change because stopping");
+ return;
+ }
+
+ if (!appeared
+ && pcmk__str_eq(node->uname, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ /* Did we get evicted? */
+ crm_notice("Our peer connection failed");
+ register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);
+
+ } else if (pcmk__str_eq(node->uname, controld_globals.dc_name,
+ pcmk__str_casei)
+ && !crm_is_peer_active(node)) {
+ /* Did the DC leave us? */
+ crm_notice("Our peer on the DC (%s) is dead",
+ controld_globals.dc_name);
+ register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);
+
+ /* @COMPAT DC < 1.1.13: If a DC shuts down normally, we don't
+ * want to fence it. Newer DCs will send their shutdown request
+ * to all peers, who will update the DC's expected state to
+ * down, thus avoiding fencing. We can safely erase the DC's
+ * transient attributes when it leaves in that case. However,
+ * the only way to avoid fencing older DCs is to leave the
+ * transient attributes intact until it rejoins.
+ */
+ if (compare_version(controld_globals.dc_version, "3.0.9") > 0) {
+ controld_delete_node_state(node->uname,
+ controld_section_attrs,
+ cib_scope_local);
+ }
+
+ } else if (AM_I_DC
+ || pcmk_is_set(controld_globals.flags, controld_dc_left)
+ || (controld_globals.dc_name == NULL)) {
+ /* This only needs to be done once, so normally the DC should do
+ * it. However if there is no DC, every node must do it, since
+ * there is no other way to ensure some one node does it.
+ */
+ if (appeared) {
+ te_trigger_stonith_history_sync(FALSE);
+ } else {
+ controld_delete_node_state(node->uname,
+ controld_section_attrs,
+ cib_scope_local);
+ }
+ }
+ break;
+ }
+
+ if (AM_I_DC) {
+ xmlNode *update = NULL;
+ int flags = node_update_peer;
+ int alive = node_alive(node);
+ pcmk__graph_action_t *down = match_down_event(node->uuid);
+
+ crm_trace("Alive=%d, appeared=%d, down=%d",
+ alive, appeared, (down? down->id : -1));
+
+ if (appeared && (alive > 0) && !is_remote) {
+ register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
+ }
+
+ if (down) {
+ const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK);
+
+ if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
+
+ /* tengine_stonith_callback() confirms fence actions */
+ crm_trace("Updating CIB %s fencer reported fencing of %s complete",
+ (pcmk_is_set(down->flags, pcmk__graph_action_confirmed)? "after" : "before"), node->uname);
+
+ } else if (!appeared && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
+
+ // Shutdown actions are immediately confirmed (i.e. no_wait)
+ if (!is_remote) {
+ flags |= node_update_join | node_update_expected;
+ crmd_peer_down(node, FALSE);
+ check_join_state(controld_globals.fsa_state, __func__);
+ }
+ if (alive >= 0) {
+ crm_info("%s of peer %s is in progress " CRM_XS " action=%d",
+ task, node->uname, down->id);
+ } else {
+ crm_notice("%s of peer %s is complete " CRM_XS " action=%d",
+ task, node->uname, down->id);
+ pcmk__update_graph(controld_globals.transition_graph, down);
+ trigger_graph();
+ }
+
+ } else {
+ crm_trace("Node %s is %s, was expected to %s (op %d)",
+ node->uname,
+ ((alive > 0)? "alive" :
+ ((alive < 0)? "dead" : "partially alive")),
+ task, down->id);
+ }
+
+ } else if (appeared == FALSE) {
+ if ((controld_globals.transition_graph == NULL)
+ || (controld_globals.transition_graph->id == -1)) {
+ crm_info("Stonith/shutdown of node %s is unknown to the "
+ "current DC", node->uname);
+ } else {
+ crm_warn("Stonith/shutdown of node %s was not expected",
+ node->uname);
+ }
+ if (!is_remote) {
+ crm_update_peer_join(__func__, node, crm_join_none);
+ check_join_state(controld_globals.fsa_state, __func__);
+ }
+ abort_transition(INFINITY, pcmk__graph_restart, "Node failure",
+ NULL);
+ fail_incompletable_actions(controld_globals.transition_graph,
+ node->uuid);
+
+ } else {
+ crm_trace("Node %s came up, was not expected to be down",
+ node->uname);
+ }
+
+ if (is_remote) {
+ /* A pacemaker_remote node won't have its cluster status updated
+ * in the CIB by membership-layer callbacks, so do it here.
+ */
+ flags |= node_update_cluster;
+
+ /* Trigger resource placement on newly integrated nodes */
+ if (appeared) {
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Pacemaker Remote node integrated", NULL);
+ }
+ }
+
+ /* Update the CIB node state */
+ update = create_node_state_update(node, flags, NULL, __func__);
+ if (update == NULL) {
+ crm_debug("Node state update not yet possible for %s", node->uname);
+ } else {
+ fsa_cib_anon_update(XML_CIB_TAG_STATUS, update);
+ }
+ free_xml(update);
+ }
+
+ controld_trigger_fsa();
+}
+
+gboolean
+crm_fsa_trigger(gpointer user_data)
+{
+ crm_trace("Invoked (queue len: %d)",
+ g_list_length(controld_globals.fsa_message_queue));
+ s_crmd_fsa(C_FSA_INTERNAL);
+ crm_trace("Exited (queue len: %d)",
+ g_list_length(controld_globals.fsa_message_queue));
+ return TRUE;
+}
diff --git a/daemons/controld/controld_callbacks.h b/daemons/controld/controld_callbacks.h
new file mode 100644
index 0000000..a69d515
--- /dev/null
+++ b/daemons/controld/controld_callbacks.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef CONTROLD_CALLBACKS__H
+# define CONTROLD_CALLBACKS__H
+
+#include <crm/cluster.h>
+
+extern void crmd_ha_msg_filter(xmlNode * msg);
+
+extern gboolean crm_fsa_trigger(gpointer user_data);
+
+extern void peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data);
+
+#endif
diff --git a/daemons/controld/controld_cib.c b/daemons/controld/controld_cib.c
new file mode 100644
index 0000000..94b99dd
--- /dev/null
+++ b/daemons/controld/controld_cib.c
@@ -0,0 +1,1138 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <unistd.h> /* sleep */
+
+#include <crm/common/alerts_internal.h>
+#include <crm/common/xml.h>
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/lrmd_internal.h>
+
+#include <pacemaker-controld.h>
+
+// Call ID of the most recent in-progress CIB resource update (or 0 if none)
+static int pending_rsc_update = 0;
+
+// Call IDs of requested CIB replacements that won't trigger a new election
+// (used as a set of gint values)
+static GHashTable *cib_replacements = NULL;
+
+/*!
+ * \internal
+ * \brief Store the call ID of a CIB replacement that the controller requested
+ *
+ * The \p do_cib_replaced() callback function will avoid triggering a new
+ * election when we're notified of one of these expected replacements.
+ *
+ * \param[in] call_id CIB call ID (or 0 for a synchronous call)
+ *
+ * \note This function should be called after making any asynchronous CIB
+ * request (or before making any synchronous CIB request) that may replace
+ * part of the nodes or status section. This may include CIB sync calls.
+ */
+void
+controld_record_cib_replace_call(int call_id)
+{
+ CRM_CHECK(call_id >= 0, return);
+
+ if (cib_replacements == NULL) {
+ cib_replacements = g_hash_table_new(NULL, NULL);
+ }
+
+ /* If the call ID is already present in the table, then it's old. We may not
+ * be removing them properly, and we could improperly ignore replacement
+ * notifications if cib_t:call_id wraps around.
+ */
+ CRM_LOG_ASSERT(g_hash_table_add(cib_replacements,
+ GINT_TO_POINTER((gint) call_id)));
+}
+
+/*!
+ * \internal
+ * \brief Remove the call ID of a CIB replacement from the replacements table
+ *
+ * \param[in] call_id CIB call ID (or 0 for a synchronous call)
+ *
+ * \return \p true if \p call_id was found in the table, or \p false otherwise
+ *
+ * \note CIB notifications run before CIB callbacks. If this function is called
+ * from within a callback, \p do_cib_replaced() will have removed
+ * \p call_id from the table first if relevant changes triggered a
+ * notification.
+ */
+bool
+controld_forget_cib_replace_call(int call_id)
+{
+ CRM_CHECK(call_id >= 0, return false);
+
+ if (cib_replacements == NULL) {
+ return false;
+ }
+ return g_hash_table_remove(cib_replacements,
+ GINT_TO_POINTER((gint) call_id));
+}
+
+/*!
+ * \internal
+ * \brief Empty the hash table containing call IDs of CIB replacement requests
+ */
+void
+controld_forget_all_cib_replace_calls(void)
+{
+ if (cib_replacements != NULL) {
+ g_hash_table_remove_all(cib_replacements);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Free the hash table containing call IDs of CIB replacement requests
+ */
+void
+controld_destroy_cib_replacements_table(void)
+{
+ if (cib_replacements != NULL) {
+ g_hash_table_destroy(cib_replacements);
+ cib_replacements = NULL;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Respond to a dropped CIB connection
+ *
+ * \param[in] user_data CIB connection that dropped
+ */
+static void
+handle_cib_disconnect(gpointer user_data)
+{
+ CRM_LOG_ASSERT(user_data == controld_globals.cib_conn);
+
+ controld_trigger_fsa();
+ controld_globals.cib_conn->state = cib_disconnected;
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) {
+ // @TODO This should trigger a reconnect, not a shutdown
+ crm_crit("Lost connection to the CIB manager, shutting down");
+ register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
+ controld_clear_fsa_input_flags(R_CIB_CONNECTED);
+
+ } else { // Expected
+ crm_info("Connection to the CIB manager terminated");
+ }
+}
+
+static void
+do_cib_updated(const char *event, xmlNode * msg)
+{
+ if (pcmk__alert_in_patchset(msg, TRUE)) {
+ controld_trigger_config();
+ }
+}
+
+static void
+do_cib_replaced(const char *event, xmlNode * msg)
+{
+ int call_id = 0;
+ const char *client_id = crm_element_value(msg, F_CIB_CLIENTID);
+ uint32_t change_section = cib_change_section_nodes
+ |cib_change_section_status;
+ long long value = 0;
+
+ crm_debug("Updating the CIB after a replace: DC=%s", pcmk__btoa(AM_I_DC));
+ if (!AM_I_DC) {
+ return;
+ }
+
+ if ((crm_element_value_int(msg, F_CIB_CALLID, &call_id) == 0)
+ && pcmk__str_eq(client_id, controld_globals.cib_client_id,
+ pcmk__str_none)
+ && controld_forget_cib_replace_call(call_id)) {
+ // We requested this replace op. No need to restart the join.
+ return;
+ }
+
+ if ((crm_element_value_ll(msg, F_CIB_CHANGE_SECTION, &value) < 0)
+ || (value < 0) || (value > UINT32_MAX)) {
+
+ crm_trace("Couldn't parse '%s' from message", F_CIB_CHANGE_SECTION);
+ } else {
+ change_section = (uint32_t) value;
+ }
+
+ if (pcmk_any_flags_set(change_section, cib_change_section_nodes
+ |cib_change_section_status)) {
+
+ /* start the join process again so we get everyone's LRM status */
+ populate_cib_nodes(node_update_quick|node_update_all, __func__);
+
+ register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
+ }
+}
+
+void
+controld_disconnect_cib_manager(void)
+{
+ cib_t *cib_conn = controld_globals.cib_conn;
+
+ CRM_ASSERT(cib_conn != NULL);
+
+ crm_info("Disconnecting from the CIB manager");
+
+ controld_clear_fsa_input_flags(R_CIB_CONNECTED);
+
+ cib_conn->cmds->del_notify_callback(cib_conn, T_CIB_REPLACE_NOTIFY,
+ do_cib_replaced);
+ cib_conn->cmds->del_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY,
+ do_cib_updated);
+ cib_free_callbacks(cib_conn);
+
+ if (cib_conn->state != cib_disconnected) {
+ cib_conn->cmds->set_secondary(cib_conn,
+ cib_scope_local|cib_discard_reply);
+ cib_conn->cmds->signoff(cib_conn);
+ }
+
+ crm_notice("Disconnected from the CIB manager");
+}
+
+/* A_CIB_STOP, A_CIB_START, O_CIB_RESTART */
+void
+do_cib_control(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ static int cib_retries = 0;
+
+ cib_t *cib_conn = controld_globals.cib_conn;
+
+ void (*dnotify_fn) (gpointer user_data) = handle_cib_disconnect;
+ void (*replace_cb) (const char *event, xmlNodePtr msg) = do_cib_replaced;
+ void (*update_cb) (const char *event, xmlNodePtr msg) = do_cib_updated;
+
+ int rc = pcmk_ok;
+
+ CRM_ASSERT(cib_conn != NULL);
+
+ if (pcmk_is_set(action, A_CIB_STOP)) {
+ if ((cib_conn->state != cib_disconnected)
+ && (pending_rsc_update != 0)) {
+
+ crm_info("Waiting for resource update %d to complete",
+ pending_rsc_update);
+ crmd_fsa_stall(FALSE);
+ return;
+ }
+ controld_disconnect_cib_manager();
+ }
+
+ if (!pcmk_is_set(action, A_CIB_START)) {
+ return;
+ }
+
+ if (cur_state == S_STOPPING) {
+ crm_err("Ignoring request to connect to the CIB manager after "
+ "shutdown");
+ return;
+ }
+
+ rc = cib_conn->cmds->signon(cib_conn, CRM_SYSTEM_CRMD,
+ cib_command_nonblocking);
+
+ if (rc != pcmk_ok) {
+ // A short wait that usually avoids stalling the FSA
+ sleep(1);
+ rc = cib_conn->cmds->signon(cib_conn, CRM_SYSTEM_CRMD,
+ cib_command_nonblocking);
+ }
+
+ if (rc != pcmk_ok) {
+ crm_info("Could not connect to the CIB manager: %s", pcmk_strerror(rc));
+
+ } else if (cib_conn->cmds->set_connection_dnotify(cib_conn,
+ dnotify_fn) != pcmk_ok) {
+ crm_err("Could not set dnotify callback");
+
+ } else if (cib_conn->cmds->add_notify_callback(cib_conn,
+ T_CIB_REPLACE_NOTIFY,
+ replace_cb) != pcmk_ok) {
+ crm_err("Could not set CIB notification callback (replace)");
+
+ } else if (cib_conn->cmds->add_notify_callback(cib_conn,
+ T_CIB_DIFF_NOTIFY,
+ update_cb) != pcmk_ok) {
+ crm_err("Could not set CIB notification callback (update)");
+
+ } else {
+ controld_set_fsa_input_flags(R_CIB_CONNECTED);
+ cib_retries = 0;
+ cib_conn->cmds->client_id(cib_conn, &controld_globals.cib_client_id,
+ NULL);
+ }
+
+ if (!pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) {
+ cib_retries++;
+
+ if (cib_retries < 30) {
+ crm_warn("Couldn't complete CIB registration %d times... "
+ "pause and retry", cib_retries);
+ controld_start_wait_timer();
+ crmd_fsa_stall(FALSE);
+
+ } else {
+ crm_err("Could not complete CIB registration %d times... "
+ "hard error", cib_retries);
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ }
+ }
+}
+
+#define MIN_CIB_OP_TIMEOUT (30)
+
+/*!
+ * \internal
+ * \brief Get the timeout (in seconds) that should be used with CIB operations
+ *
+ * \return The maximum of 30 seconds, the value of the PCMK_cib_timeout
+ * environment variable, or 10 seconds times one more than the number of
+ * nodes in the cluster.
+ */
+unsigned int
+cib_op_timeout(void)
+{
+ static int env_timeout = -1;
+ unsigned int calculated_timeout = 0;
+
+ if (env_timeout == -1) {
+ const char *env = getenv("PCMK_cib_timeout");
+
+ pcmk__scan_min_int(env, &env_timeout, MIN_CIB_OP_TIMEOUT);
+ crm_trace("Minimum CIB op timeout: %ds (environment: %s)",
+ env_timeout, (env? env : "none"));
+ }
+
+ calculated_timeout = 1 + crm_active_peers();
+ if (crm_remote_peer_cache) {
+ calculated_timeout += g_hash_table_size(crm_remote_peer_cache);
+ }
+ calculated_timeout *= 10;
+
+ calculated_timeout = QB_MAX(calculated_timeout, env_timeout);
+ crm_trace("Calculated timeout: %us", calculated_timeout);
+
+ if (controld_globals.cib_conn) {
+ controld_globals.cib_conn->call_timeout = calculated_timeout;
+ }
+ return calculated_timeout;
+}
+
+/*!
+ * \internal
+ * \brief Get CIB call options to use local scope if primary is unavailable
+ *
+ * \return CIB call options
+ */
+int
+crmd_cib_smart_opt(void)
+{
+ int call_opt = cib_none;
+
+ if ((controld_globals.fsa_state == S_ELECTION)
+ || (controld_globals.fsa_state == S_PENDING)) {
+ crm_info("Sending update to local CIB in state: %s",
+ fsa_state2string(controld_globals.fsa_state));
+ cib__set_call_options(call_opt, "update", cib_scope_local);
+ }
+ return call_opt;
+}
+
+static void
+cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output,
+ void *user_data)
+{
+ char *desc = user_data;
+
+ if (rc == 0) {
+ crm_debug("Deletion of %s (via CIB call %d) succeeded", desc, call_id);
+ } else {
+ crm_warn("Deletion of %s (via CIB call %d) failed: %s " CRM_XS " rc=%d",
+ desc, call_id, pcmk_strerror(rc), rc);
+ }
+}
+
+// Searches for various portions of node_state to delete
+
+// Match a particular node's node_state (takes node name 1x)
+#define XPATH_NODE_STATE "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']"
+
+// Node's lrm section (name 1x)
+#define XPATH_NODE_LRM XPATH_NODE_STATE "/" XML_CIB_TAG_LRM
+
+/* Node's lrm_rsc_op entries and lrm_resource entries without unexpired lock
+ * (name 2x, (seconds_since_epoch - XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT) 1x)
+ */
+#define XPATH_NODE_LRM_UNLOCKED XPATH_NODE_STATE "//" XML_LRM_TAG_RSC_OP \
+ "|" XPATH_NODE_STATE \
+ "//" XML_LRM_TAG_RESOURCE \
+ "[not(@" XML_CONFIG_ATTR_SHUTDOWN_LOCK ") " \
+ "or " XML_CONFIG_ATTR_SHUTDOWN_LOCK "<%lld]"
+
+// Node's transient_attributes section (name 1x)
+#define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" XML_TAG_TRANSIENT_NODEATTRS
+
+// Everything under node_state (name 1x)
+#define XPATH_NODE_ALL XPATH_NODE_STATE "/*"
+
+/* Unlocked history + transient attributes
+ * (name 2x, (seconds_since_epoch - XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT) 1x,
+ * name 1x)
+ */
+#define XPATH_NODE_ALL_UNLOCKED XPATH_NODE_LRM_UNLOCKED "|" XPATH_NODE_ATTRS
+
+/*!
+ * \internal
+ * \brief Delete subsection of a node's CIB node_state
+ *
+ * \param[in] uname Desired node
+ * \param[in] section Subsection of node_state to delete
+ * \param[in] options CIB call options to use
+ */
+void
+controld_delete_node_state(const char *uname, enum controld_section_e section,
+ int options)
+{
+ cib_t *cib_conn = controld_globals.cib_conn;
+
+ char *xpath = NULL;
+ char *desc = NULL;
+
+ // Shutdown locks that started before this time are expired
+ long long expire = (long long) time(NULL)
+ - controld_globals.shutdown_lock_limit;
+
+ CRM_CHECK(uname != NULL, return);
+ switch (section) {
+ case controld_section_lrm:
+ xpath = crm_strdup_printf(XPATH_NODE_LRM, uname);
+ desc = crm_strdup_printf("resource history for node %s", uname);
+ break;
+ case controld_section_lrm_unlocked:
+ xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED,
+ uname, uname, expire);
+ desc = crm_strdup_printf("resource history (other than shutdown "
+ "locks) for node %s", uname);
+ break;
+ case controld_section_attrs:
+ xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname);
+ desc = crm_strdup_printf("transient attributes for node %s", uname);
+ break;
+ case controld_section_all:
+ xpath = crm_strdup_printf(XPATH_NODE_ALL, uname);
+ desc = crm_strdup_printf("all state for node %s", uname);
+ break;
+ case controld_section_all_unlocked:
+ xpath = crm_strdup_printf(XPATH_NODE_ALL_UNLOCKED,
+ uname, uname, expire, uname);
+ desc = crm_strdup_printf("all state (other than shutdown locks) "
+ "for node %s", uname);
+ break;
+ }
+
+ if (cib_conn == NULL) {
+ crm_warn("Unable to delete %s: no CIB connection", desc);
+ free(desc);
+ } else {
+ int call_id;
+
+ cib__set_call_options(options, "node state deletion",
+ cib_xpath|cib_multiple);
+ call_id = cib_conn->cmds->remove(cib_conn, xpath, NULL, options);
+ crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s",
+ desc, call_id, xpath);
+ fsa_register_cib_callback(call_id, desc, cib_delete_callback);
+ // CIB library handles freeing desc
+ }
+ free(xpath);
+}
+
+// Takes node name and resource ID
+#define XPATH_RESOURCE_HISTORY "//" XML_CIB_TAG_STATE \
+ "[@" XML_ATTR_UNAME "='%s']/" \
+ XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \
+ "/" XML_LRM_TAG_RESOURCE \
+ "[@" XML_ATTR_ID "='%s']"
+// @TODO could add "and @XML_CONFIG_ATTR_SHUTDOWN_LOCK" to limit to locks
+
+/*!
+ * \internal
+ * \brief Clear resource history from CIB for a given resource and node
+ *
+ * \param[in] rsc_id ID of resource to be cleared
+ * \param[in] node Node whose resource history should be cleared
+ * \param[in] user_name ACL user name to use
+ * \param[in] call_options CIB call options
+ *
+ * \return Standard Pacemaker return code
+ */
+int
+controld_delete_resource_history(const char *rsc_id, const char *node,
+ const char *user_name, int call_options)
+{
+ char *desc = NULL;
+ char *xpath = NULL;
+ int rc = pcmk_rc_ok;
+
+ CRM_CHECK((rsc_id != NULL) && (node != NULL), return EINVAL);
+
+ desc = crm_strdup_printf("resource history for %s on %s", rsc_id, node);
+ if (controld_globals.cib_conn == NULL) {
+ crm_err("Unable to clear %s: no CIB connection", desc);
+ free(desc);
+ return ENOTCONN;
+ }
+
+ // Ask CIB to delete the entry
+ xpath = crm_strdup_printf(XPATH_RESOURCE_HISTORY, node, rsc_id);
+ rc = cib_internal_op(controld_globals.cib_conn, PCMK__CIB_REQUEST_DELETE,
+ NULL, xpath, NULL, NULL, call_options|cib_xpath,
+ user_name);
+
+ if (rc < 0) {
+ rc = pcmk_legacy2rc(rc);
+ crm_err("Could not delete resource status of %s on %s%s%s: %s "
+ CRM_XS " rc=%d", rsc_id, node,
+ (user_name? " for user " : ""), (user_name? user_name : ""),
+ pcmk_rc_str(rc), rc);
+ free(desc);
+ free(xpath);
+ return rc;
+ }
+
+ if (pcmk_is_set(call_options, cib_sync_call)) {
+ if (pcmk_is_set(call_options, cib_dryrun)) {
+ crm_debug("Deletion of %s would succeed", desc);
+ } else {
+ crm_debug("Deletion of %s succeeded", desc);
+ }
+ free(desc);
+
+ } else {
+ crm_info("Clearing %s (via CIB call %d) " CRM_XS " xpath=%s",
+ desc, rc, xpath);
+ fsa_register_cib_callback(rc, desc, cib_delete_callback);
+ // CIB library handles freeing desc
+ }
+
+ free(xpath);
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief Build XML and string of parameters meeting some criteria, for digest
+ *
+ * \param[in] op Executor event with parameter table to use
+ * \param[in] metadata Parsed meta-data for executed resource agent
+ * \param[in] param_type Flag used for selection criteria
+ * \param[out] result Will be set to newly created XML with selected
+ * parameters as attributes
+ *
+ * \return Newly allocated space-separated string of parameter names
+ * \note Selection criteria varies by param_type: for the restart digest, we
+ * want parameters that are *not* marked reloadable (OCF 1.1) or that
+ * *are* marked unique (pre-1.1), for both string and XML results; for the
+ * secure digest, we want parameters that *are* marked private for the
+ * string, but parameters that are *not* marked private for the XML.
+ * \note It is the caller's responsibility to free the string return value with
+ * \p g_string_free() and the XML result with \p free_xml().
+ */
+static GString *
+build_parameter_list(const lrmd_event_data_t *op,
+ const struct ra_metadata_s *metadata,
+ enum ra_param_flags_e param_type, xmlNode **result)
+{
+ GString *list = NULL;
+
+ *result = create_xml_node(NULL, XML_TAG_PARAMS);
+
+ /* Consider all parameters only except private ones to be consistent with
+ * what scheduler does with calculate_secure_digest().
+ */
+ if (param_type == ra_param_private
+ && compare_version(controld_globals.dc_version, "3.16.0") >= 0) {
+ g_hash_table_foreach(op->params, hash2field, *result);
+ pcmk__filter_op_for_digest(*result);
+ }
+
+ for (GList *iter = metadata->ra_params; iter != NULL; iter = iter->next) {
+ struct ra_param_s *param = (struct ra_param_s *) iter->data;
+
+ bool accept_for_list = false;
+ bool accept_for_xml = false;
+
+ switch (param_type) {
+ case ra_param_reloadable:
+ accept_for_list = !pcmk_is_set(param->rap_flags, param_type);
+ accept_for_xml = accept_for_list;
+ break;
+
+ case ra_param_unique:
+ accept_for_list = pcmk_is_set(param->rap_flags, param_type);
+ accept_for_xml = accept_for_list;
+ break;
+
+ case ra_param_private:
+ accept_for_list = pcmk_is_set(param->rap_flags, param_type);
+ accept_for_xml = !accept_for_list;
+ break;
+ }
+
+ if (accept_for_list) {
+ crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type));
+
+ if (list == NULL) {
+ // We will later search for " WORD ", so start list with a space
+ pcmk__add_word(&list, 256, " ");
+ }
+ pcmk__add_word(&list, 0, param->rap_name);
+
+ } else {
+ crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type));
+ }
+
+ if (accept_for_xml) {
+ const char *v = g_hash_table_lookup(op->params, param->rap_name);
+
+ if (v != NULL) {
+ crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v);
+ crm_xml_add(*result, param->rap_name, v);
+ }
+
+ } else {
+ crm_trace("Removing attr %s from the xml result", param->rap_name);
+ xml_remove_prop(*result, param->rap_name);
+ }
+ }
+
+ if (list != NULL) {
+ // We will later search for " WORD ", so end list with a space
+ pcmk__add_word(&list, 0, " ");
+ }
+ return list;
+}
+
+static void
+append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata,
+ xmlNode *update, const char *version)
+{
+ GString *list = NULL;
+ char *digest = NULL;
+ xmlNode *restart = NULL;
+
+ CRM_LOG_ASSERT(op->params != NULL);
+
+ if (op->interval_ms > 0) {
+ /* monitors are not reloadable */
+ return;
+ }
+
+ if (pcmk_is_set(metadata->ra_flags, ra_supports_reload_agent)) {
+ // Add parameters not marked reloadable to the "op-force-restart" list
+ list = build_parameter_list(op, metadata, ra_param_reloadable,
+ &restart);
+
+ } else if (pcmk_is_set(metadata->ra_flags, ra_supports_legacy_reload)) {
+ /* @COMPAT pre-OCF-1.1 resource agents
+ *
+ * Before OCF 1.1, Pacemaker abused "unique=0" to indicate
+ * reloadability. Add any parameters with unique="1" to the
+ * "op-force-restart" list.
+ */
+ list = build_parameter_list(op, metadata, ra_param_unique, &restart);
+
+ } else {
+ // Resource does not support agent reloads
+ return;
+ }
+
+ digest = calculate_operation_digest(restart, version);
+ /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload,
+ * no matter if it actually supports any parameters with unique="1"). */
+ crm_xml_add(update, XML_LRM_ATTR_OP_RESTART,
+ (list == NULL)? "" : (const char *) list->str);
+ crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest);
+
+ if ((list != NULL) && (list->len > 0)) {
+ crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str);
+ } else {
+ crm_trace("%s: %s", op->rsc_id, digest);
+ }
+
+ if (list != NULL) {
+ g_string_free(list, TRUE);
+ }
+ free_xml(restart);
+ free(digest);
+}
+
+static void
+append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata,
+ xmlNode *update, const char *version)
+{
+ GString *list = NULL;
+ char *digest = NULL;
+ xmlNode *secure = NULL;
+
+ CRM_LOG_ASSERT(op->params != NULL);
+
+ /*
+ * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the
+ * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on
+ * the insecure ones
+ */
+ list = build_parameter_list(op, metadata, ra_param_private, &secure);
+
+ if (list != NULL) {
+ digest = calculate_operation_digest(secure, version);
+ crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, (const char *) list->str);
+ crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest);
+
+ crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str);
+ g_string_free(list, TRUE);
+ } else {
+ crm_trace("%s: no secure parameters", op->rsc_id);
+ }
+
+ free_xml(secure);
+ free(digest);
+}
+
+/*!
+ * \internal
+ * \brief Create XML for a resource history entry
+ *
+ * \param[in] func Function name of caller
+ * \param[in,out] parent XML to add entry to
+ * \param[in] rsc Affected resource
+ * \param[in,out] op Action to add an entry for (or NULL to do nothing)
+ * \param[in] node_name Node where action occurred
+ */
+void
+controld_add_resource_history_xml_as(const char *func, xmlNode *parent,
+ const lrmd_rsc_info_t *rsc,
+ lrmd_event_data_t *op,
+ const char *node_name)
+{
+ int target_rc = 0;
+ xmlNode *xml_op = NULL;
+ struct ra_metadata_s *metadata = NULL;
+ const char *caller_version = NULL;
+ lrm_state_t *lrm_state = NULL;
+
+ if (op == NULL) {
+ return;
+ }
+
+ target_rc = rsc_op_expected_rc(op);
+
+ caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION);
+ CRM_CHECK(caller_version != NULL, caller_version = CRM_FEATURE_SET);
+
+ xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc,
+ controld_globals.our_nodename, func);
+ if (xml_op == NULL) {
+ return;
+ }
+
+ if ((rsc == NULL) || (op->params == NULL)
+ || !crm_op_needs_metadata(rsc->standard, op->op_type)) {
+
+ crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)",
+ op->op_type, op->rsc_id, op->params, rsc);
+ return;
+ }
+
+ lrm_state = lrm_state_find(node_name);
+ if (lrm_state == NULL) {
+ crm_warn("Cannot calculate digests for operation " PCMK__OP_FMT
+ " because we have no connection to executor for %s",
+ op->rsc_id, op->op_type, op->interval_ms, node_name);
+ return;
+ }
+
+ /* Ideally the metadata is cached, and the agent is just a fallback.
+ *
+ * @TODO Go through all callers and ensure they get metadata asynchronously
+ * first.
+ */
+ metadata = controld_get_rsc_metadata(lrm_state, rsc,
+ controld_metadata_from_agent
+ |controld_metadata_from_cache);
+ if (metadata == NULL) {
+ return;
+ }
+
+ crm_trace("Including additional digests for %s:%s:%s",
+ rsc->standard, rsc->provider, rsc->type);
+ append_restart_list(op, metadata, xml_op, caller_version);
+ append_secure_list(op, metadata, xml_op, caller_version);
+
+ return;
+}
+
+/*!
+ * \internal
+ * \brief Record an action as pending in the CIB, if appropriate
+ *
+ * \param[in] node_name Node where the action is pending
+ * \param[in] rsc Resource that action is for
+ * \param[in,out] op Pending action
+ *
+ * \return true if action was recorded in CIB, otherwise false
+ */
+bool
+controld_record_pending_op(const char *node_name, const lrmd_rsc_info_t *rsc,
+ lrmd_event_data_t *op)
+{
+ const char *record_pending = NULL;
+
+ CRM_CHECK((node_name != NULL) && (rsc != NULL) && (op != NULL),
+ return false);
+
+ // Never record certain operation types as pending
+ if ((op->op_type == NULL) || (op->params == NULL)
+ || !controld_action_is_recordable(op->op_type)) {
+ return false;
+ }
+
+ // Check action's record-pending meta-attribute (defaults to true)
+ record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING);
+ if ((record_pending != NULL) && !crm_is_true(record_pending)) {
+ return false;
+ }
+
+ op->call_id = -1;
+ op->t_run = time(NULL);
+ op->t_rcchange = op->t_run;
+
+ lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL);
+
+ crm_debug("Recording pending %s-interval %s for %s on %s in the CIB",
+ pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id,
+ node_name);
+ controld_update_resource_history(node_name, rsc, op, 0);
+ return true;
+}
+
+static void
+cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ switch (rc) {
+ case pcmk_ok:
+ case -pcmk_err_diff_failed:
+ case -pcmk_err_diff_resync:
+ crm_trace("Resource update %d complete: rc=%d", call_id, rc);
+ break;
+ default:
+ crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc));
+ }
+
+ if (call_id == pending_rsc_update) {
+ pending_rsc_update = 0;
+ controld_trigger_fsa();
+ }
+}
+
+/* Only successful stops, and probes that found the resource inactive, get locks
+ * recorded in the history. This ensures the resource stays locked to the node
+ * until it is active there again after the node comes back up.
+ */
+static bool
+should_preserve_lock(lrmd_event_data_t *op)
+{
+ if (!pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
+ return false;
+ }
+ if (!strcmp(op->op_type, RSC_STOP) && (op->rc == PCMK_OCF_OK)) {
+ return true;
+ }
+ if (!strcmp(op->op_type, RSC_STATUS) && (op->rc == PCMK_OCF_NOT_RUNNING)) {
+ return true;
+ }
+ return false;
+}
+
+/*!
+ * \internal
+ * \brief Request a CIB update
+ *
+ * \param[in] section Section of CIB to update
+ * \param[in,out] data New XML of CIB section to update
+ * \param[in] options CIB call options
+ * \param[in] callback If not NULL, set this as the operation callback
+ *
+ * \return Standard Pacemaker return code
+ *
+ * \note If \p callback is \p cib_rsc_callback(), the CIB update's call ID is
+ * stored in \p pending_rsc_update on success.
+ */
+int
+controld_update_cib(const char *section, xmlNode *data, int options,
+ void (*callback)(xmlNode *, int, int, xmlNode *, void *))
+{
+ int cib_rc = -ENOTCONN;
+
+ CRM_ASSERT(data != NULL);
+
+ if (controld_globals.cib_conn != NULL) {
+ cib_rc = cib_internal_op(controld_globals.cib_conn,
+ PCMK__CIB_REQUEST_MODIFY, NULL, section,
+ data, NULL, options, NULL);
+ if (cib_rc >= 0) {
+ crm_debug("Submitted CIB update %d for %s section",
+ cib_rc, section);
+ }
+ }
+
+ if (callback == NULL) {
+ if (cib_rc < 0) {
+ crm_err("Failed to update CIB %s section: %s",
+ section, pcmk_rc_str(pcmk_legacy2rc(cib_rc)));
+ }
+
+ } else {
+ if ((cib_rc >= 0) && (callback == cib_rsc_callback)) {
+ /* Checking for a particular callback is a little hacky, but it
+ * didn't seem worth adding an output argument for cib_rc for just
+ * one use case.
+ */
+ pending_rsc_update = cib_rc;
+ }
+ fsa_register_cib_callback(cib_rc, NULL, callback);
+ }
+
+ return (cib_rc >= 0)? pcmk_rc_ok : pcmk_legacy2rc(cib_rc);
+}
+
+/*!
+ * \internal
+ * \brief Update resource history entry in CIB
+ *
+ * \param[in] node_name Node where action occurred
+ * \param[in] rsc Resource that action is for
+ * \param[in,out] op Action to record
+ * \param[in] lock_time If nonzero, when resource was locked to node
+ *
+ * \note On success, the CIB update's call ID will be stored in
+ * pending_rsc_update.
+ */
+void
+controld_update_resource_history(const char *node_name,
+ const lrmd_rsc_info_t *rsc,
+ lrmd_event_data_t *op, time_t lock_time)
+{
+ xmlNode *update = NULL;
+ xmlNode *xml = NULL;
+ int call_opt = crmd_cib_smart_opt();
+ const char *node_id = NULL;
+ const char *container = NULL;
+
+ CRM_CHECK((node_name != NULL) && (op != NULL), return);
+
+ if (rsc == NULL) {
+ crm_warn("Resource %s no longer exists in the executor", op->rsc_id);
+ controld_ack_event_directly(NULL, NULL, rsc, op, op->rsc_id);
+ return;
+ }
+
+ // <status>
+ update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
+
+ // <node_state ...>
+ xml = create_xml_node(update, XML_CIB_TAG_STATE);
+ if (pcmk__str_eq(node_name, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ node_id = controld_globals.our_uuid;
+ } else {
+ node_id = node_name;
+ pcmk__xe_set_bool_attr(xml, XML_NODE_IS_REMOTE, true);
+ }
+ crm_xml_add(xml, XML_ATTR_ID, node_id);
+ crm_xml_add(xml, XML_ATTR_UNAME, node_name);
+ crm_xml_add(xml, XML_ATTR_ORIGIN, __func__);
+
+ // <lrm ...>
+ xml = create_xml_node(xml, XML_CIB_TAG_LRM);
+ crm_xml_add(xml, XML_ATTR_ID, node_id);
+
+ // <lrm_resources>
+ xml = create_xml_node(xml, XML_LRM_TAG_RESOURCES);
+
+ // <lrm_resource ...>
+ xml = create_xml_node(xml, XML_LRM_TAG_RESOURCE);
+ crm_xml_add(xml, XML_ATTR_ID, op->rsc_id);
+ crm_xml_add(xml, XML_AGENT_ATTR_CLASS, rsc->standard);
+ crm_xml_add(xml, XML_AGENT_ATTR_PROVIDER, rsc->provider);
+ crm_xml_add(xml, XML_ATTR_TYPE, rsc->type);
+ if (lock_time != 0) {
+ /* Actions on a locked resource should either preserve the lock by
+ * recording it with the action result, or clear it.
+ */
+ if (!should_preserve_lock(op)) {
+ lock_time = 0;
+ }
+ crm_xml_add_ll(xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
+ (long long) lock_time);
+ }
+ if (op->params != NULL) {
+ container = g_hash_table_lookup(op->params,
+ CRM_META "_" XML_RSC_ATTR_CONTAINER);
+ if (container != NULL) {
+ crm_trace("Resource %s is a part of container resource %s",
+ op->rsc_id, container);
+ crm_xml_add(xml, XML_RSC_ATTR_CONTAINER, container);
+ }
+ }
+
+ // <lrm_resource_op ...> (possibly more than one)
+ controld_add_resource_history_xml(xml, rsc, op, node_name);
+
+ /* Update CIB asynchronously. Even if it fails, the resource state should be
+ * discovered during the next election. Worst case, the node is wrongly
+ * fenced for running a resource it isn't.
+ */
+ crm_log_xml_trace(update, __func__);
+ controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, cib_rsc_callback);
+ free_xml(update);
+}
+
+/*!
+ * \internal
+ * \brief Erase an LRM history entry from the CIB, given the operation data
+ *
+ * \param[in] op Operation whose history should be deleted
+ */
+void
+controld_delete_action_history(const lrmd_event_data_t *op)
+{
+ xmlNode *xml_top = NULL;
+
+ CRM_CHECK(op != NULL, return);
+
+ xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP);
+ crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id);
+ crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data);
+
+ if (op->interval_ms > 0) {
+ char *op_id = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms);
+
+ /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */
+ crm_xml_add(xml_top, XML_ATTR_ID, op_id);
+ free(op_id);
+ }
+
+ crm_debug("Erasing resource operation history for " PCMK__OP_FMT " (call=%d)",
+ op->rsc_id, op->op_type, op->interval_ms, op->call_id);
+
+ controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn,
+ XML_CIB_TAG_STATUS, xml_top,
+ cib_none);
+
+ crm_log_xml_trace(xml_top, "op:cancel");
+ free_xml(xml_top);
+}
+
+/* Define xpath to find LRM resource history entry by node and resource */
+#define XPATH_HISTORY \
+ "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \
+ "/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \
+ "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \
+ "/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \
+ "/" XML_LRM_TAG_RSC_OP
+
+/* ... and also by operation key */
+#define XPATH_HISTORY_ID XPATH_HISTORY \
+ "[@" XML_ATTR_ID "='%s']"
+
+/* ... and also by operation key and operation call ID */
+#define XPATH_HISTORY_CALL XPATH_HISTORY \
+ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_CALLID "='%d']"
+
+/* ... and also by operation key and original operation key */
+#define XPATH_HISTORY_ORIG XPATH_HISTORY \
+ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_TASK_KEY "='%s']"
+
+/*!
+ * \internal
+ * \brief Delete a last_failure resource history entry from the CIB
+ *
+ * \param[in] rsc_id Name of resource to clear history for
+ * \param[in] node Name of node to clear history for
+ * \param[in] action If specified, delete only if this was failed action
+ * \param[in] interval_ms If \p action is specified, it has this interval
+ */
+void
+controld_cib_delete_last_failure(const char *rsc_id, const char *node,
+ const char *action, guint interval_ms)
+{
+ char *xpath = NULL;
+ char *last_failure_key = NULL;
+
+ CRM_CHECK((rsc_id != NULL) && (node != NULL), return);
+
+ // Generate XPath to match desired entry
+ last_failure_key = pcmk__op_key(rsc_id, "last_failure", 0);
+ if (action == NULL) {
+ xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id,
+ last_failure_key);
+ } else {
+ char *action_key = pcmk__op_key(rsc_id, action, interval_ms);
+
+ xpath = crm_strdup_printf(XPATH_HISTORY_ORIG, node, rsc_id,
+ last_failure_key, action_key);
+ free(action_key);
+ }
+ free(last_failure_key);
+
+ controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath,
+ NULL, cib_xpath);
+ free(xpath);
+}
+
+/*!
+ * \internal
+ * \brief Delete resource history entry from the CIB, given operation key
+ *
+ * \param[in] rsc_id Name of resource to clear history for
+ * \param[in] node Name of node to clear history for
+ * \param[in] key Operation key of operation to clear history for
+ * \param[in] call_id If specified, delete entry only if it has this call ID
+ */
+void
+controld_delete_action_history_by_key(const char *rsc_id, const char *node,
+ const char *key, int call_id)
+{
+ char *xpath = NULL;
+
+ CRM_CHECK((rsc_id != NULL) && (node != NULL) && (key != NULL), return);
+
+ if (call_id > 0) {
+ xpath = crm_strdup_printf(XPATH_HISTORY_CALL, node, rsc_id, key,
+ call_id);
+ } else {
+ xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id, key);
+ }
+ controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath,
+ NULL, cib_xpath);
+ free(xpath);
+}
diff --git a/daemons/controld/controld_cib.h b/daemons/controld/controld_cib.h
new file mode 100644
index 0000000..bd9492a
--- /dev/null
+++ b/daemons/controld/controld_cib.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef PCMK__CONTROLD_CIB__H
+#define PCMK__CONTROLD_CIB__H
+
+#include <crm_internal.h>
+
+#include <glib.h>
+
+#include <crm/crm.h>
+#include <crm/common/xml.h>
+#include <crm/cib/internal.h> // PCMK__CIB_REQUEST_MODIFY
+#include "controld_globals.h" // controld_globals.cib_conn
+
+static inline void
+fsa_cib_anon_update(const char *section, xmlNode *data) {
+ if (controld_globals.cib_conn == NULL) {
+ crm_err("No CIB connection available");
+ } else {
+ controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
+ section, data,
+ cib_scope_local|cib_can_create);
+ }
+}
+
+static inline void
+fsa_cib_anon_update_discard_reply(const char *section, xmlNode *data) {
+ if (controld_globals.cib_conn == NULL) {
+ crm_err("No CIB connection available");
+ } else {
+ controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
+ section, data,
+ cib_scope_local
+ |cib_can_create
+ |cib_discard_reply);
+ }
+}
+
+void controld_record_cib_replace_call(int call_id);
+bool controld_forget_cib_replace_call(int call_id);
+void controld_forget_all_cib_replace_calls(void);
+void controld_destroy_cib_replacements_table(void);
+
+int controld_update_cib(const char *section, xmlNode *data, int options,
+ void (*callback)(xmlNode *, int, int, xmlNode *,
+ void *));
+unsigned int cib_op_timeout(void);
+
+// Subsections of node_state
+enum controld_section_e {
+ controld_section_lrm,
+ controld_section_lrm_unlocked,
+ controld_section_attrs,
+ controld_section_all,
+ controld_section_all_unlocked
+};
+
+void controld_delete_node_state(const char *uname,
+ enum controld_section_e section, int options);
+int controld_delete_resource_history(const char *rsc_id, const char *node,
+ const char *user_name, int call_options);
+
+/* Convenience macro for registering a CIB callback
+ * (assumes that data can be freed with free())
+ */
+# define fsa_register_cib_callback(id, data, fn) do { \
+ cib_t *cib_conn = controld_globals.cib_conn; \
+ \
+ CRM_ASSERT(cib_conn != NULL); \
+ cib_conn->cmds->register_callback_full(cib_conn, id, cib_op_timeout(), \
+ FALSE, data, #fn, fn, free); \
+ } while(0)
+
+void controld_add_resource_history_xml_as(const char *func, xmlNode *parent,
+ const lrmd_rsc_info_t *rsc,
+ lrmd_event_data_t *op,
+ const char *node_name);
+
+#define controld_add_resource_history_xml(parent, rsc, op, node_name) \
+ controld_add_resource_history_xml_as(__func__, (parent), (rsc), \
+ (op), (node_name))
+
+bool controld_record_pending_op(const char *node_name,
+ const lrmd_rsc_info_t *rsc,
+ lrmd_event_data_t *op);
+
+void controld_update_resource_history(const char *node_name,
+ const lrmd_rsc_info_t *rsc,
+ lrmd_event_data_t *op, time_t lock_time);
+
+void controld_delete_action_history(const lrmd_event_data_t *op);
+
+void controld_cib_delete_last_failure(const char *rsc_id, const char *node,
+ const char *action, guint interval_ms);
+
+void controld_delete_action_history_by_key(const char *rsc_id, const char *node,
+ const char *key, int call_id);
+
+void controld_disconnect_cib_manager(void);
+
+int crmd_cib_smart_opt(void);
+
+/*!
+ * \internal
+ * \brief Check whether an action type should be recorded in the CIB
+ *
+ * \param[in] action Action type
+ *
+ * \return true if action should be recorded, false otherwise
+ */
+static inline bool
+controld_action_is_recordable(const char *action)
+{
+ return !pcmk__str_any_of(action, CRMD_ACTION_CANCEL, CRMD_ACTION_DELETE,
+ CRMD_ACTION_NOTIFY, CRMD_ACTION_METADATA, NULL);
+}
+
+#endif // PCMK__CONTROLD_CIB__H
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
new file mode 100644
index 0000000..ffc62a0
--- /dev/null
+++ b/daemons/controld/controld_control.c
@@ -0,0 +1,857 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/pengine/rules.h>
+#include <crm/cluster/internal.h>
+#include <crm/cluster/election_internal.h>
+#include <crm/common/ipc_internal.h>
+
+#include <pacemaker-controld.h>
+
+static qb_ipcs_service_t *ipcs = NULL;
+
+static crm_trigger_t *config_read_trigger = NULL;
+
+#if SUPPORT_COROSYNC
+extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
+#endif
+
+void crm_shutdown(int nsig);
+static gboolean crm_read_options(gpointer user_data);
+
+/* A_HA_CONNECT */
+void
+do_ha_control(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ gboolean registered = FALSE;
+ static crm_cluster_t *cluster = NULL;
+
+ if (cluster == NULL) {
+ cluster = pcmk_cluster_new();
+ }
+
+ if (action & A_HA_DISCONNECT) {
+ crm_cluster_disconnect(cluster);
+ crm_info("Disconnected from the cluster");
+
+ controld_set_fsa_input_flags(R_HA_DISCONNECTED);
+ }
+
+ if (action & A_HA_CONNECT) {
+ crm_set_status_callback(&peer_update_callback);
+ crm_set_autoreap(FALSE);
+
+#if SUPPORT_COROSYNC
+ if (is_corosync_cluster()) {
+ registered = crm_connect_corosync(cluster);
+ }
+#endif // SUPPORT_COROSYNC
+
+ if (registered) {
+ controld_election_init(cluster->uname);
+ controld_globals.our_nodename = cluster->uname;
+ controld_globals.our_uuid = cluster->uuid;
+ if(cluster->uuid == NULL) {
+ crm_err("Could not obtain local uuid");
+ registered = FALSE;
+ }
+ }
+
+ if (!registered) {
+ controld_set_fsa_input_flags(R_HA_DISCONNECTED);
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ return;
+ }
+
+ populate_cib_nodes(node_update_none, __func__);
+ controld_clear_fsa_input_flags(R_HA_DISCONNECTED);
+ crm_info("Connected to the cluster");
+ }
+
+ if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
+ crm_err("Unexpected action %s in %s", fsa_action2string(action),
+ __func__);
+ }
+}
+
+/* A_SHUTDOWN */
+void
+do_shutdown(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ /* just in case */
+ controld_set_fsa_input_flags(R_SHUTDOWN);
+ controld_disconnect_fencer(FALSE);
+}
+
+/* A_SHUTDOWN_REQ */
+void
+do_shutdown_req(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ xmlNode *msg = NULL;
+
+ controld_set_fsa_input_flags(R_SHUTDOWN);
+ //controld_set_fsa_input_flags(R_STAYDOWN);
+ crm_info("Sending shutdown request to all peers (DC is %s)",
+ pcmk__s(controld_globals.dc_name, "not set"));
+ msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
+
+ if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ }
+ free_xml(msg);
+}
+
+void
+crmd_fast_exit(crm_exit_t exit_code)
+{
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) {
+ crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
+ exit_code, CRM_EX_FATAL);
+ exit_code = CRM_EX_FATAL;
+
+ } else if ((exit_code == CRM_EX_OK)
+ && pcmk_is_set(controld_globals.fsa_input_register,
+ R_IN_RECOVERY)) {
+ crm_err("Could not recover from internal error");
+ exit_code = CRM_EX_ERROR;
+ }
+
+ if (controld_globals.logger_out != NULL) {
+ controld_globals.logger_out->finish(controld_globals.logger_out,
+ exit_code, true, NULL);
+ pcmk__output_free(controld_globals.logger_out);
+ controld_globals.logger_out = NULL;
+ }
+
+ crm_exit(exit_code);
+}
+
+crm_exit_t
+crmd_exit(crm_exit_t exit_code)
+{
+ GMainLoop *mloop = controld_globals.mainloop;
+
+ static bool in_progress = FALSE;
+
+ if (in_progress && (exit_code == CRM_EX_OK)) {
+ crm_debug("Exit is already in progress");
+ return exit_code;
+
+ } else if(in_progress) {
+ crm_notice("Error during shutdown process, exiting now with status %d (%s)",
+ exit_code, crm_exit_str(exit_code));
+ crm_write_blackbox(SIGTRAP, NULL);
+ crmd_fast_exit(exit_code);
+ }
+
+ in_progress = TRUE;
+ crm_trace("Preparing to exit with status %d (%s)",
+ exit_code, crm_exit_str(exit_code));
+
+ /* Suppress secondary errors resulting from us disconnecting everything */
+ controld_set_fsa_input_flags(R_HA_DISCONNECTED);
+
+/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */
+
+ if(ipcs) {
+ crm_trace("Closing IPC server");
+ mainloop_del_ipc_server(ipcs);
+ ipcs = NULL;
+ }
+
+ controld_close_attrd_ipc();
+ controld_shutdown_schedulerd_ipc();
+ controld_disconnect_fencer(TRUE);
+
+ if ((exit_code == CRM_EX_OK) && (controld_globals.mainloop == NULL)) {
+ crm_debug("No mainloop detected");
+ exit_code = CRM_EX_ERROR;
+ }
+
+ /* On an error, just get out.
+ *
+ * Otherwise, make the effort to have mainloop exit gracefully so
+ * that it (mostly) cleans up after itself and valgrind has less
+ * to report on - allowing real errors stand out
+ */
+ if (exit_code != CRM_EX_OK) {
+ crm_notice("Forcing immediate exit with status %d (%s)",
+ exit_code, crm_exit_str(exit_code));
+ crm_write_blackbox(SIGTRAP, NULL);
+ crmd_fast_exit(exit_code);
+ }
+
+/* Clean up as much memory as possible for valgrind */
+
+ for (GList *iter = controld_globals.fsa_message_queue; iter != NULL;
+ iter = iter->next) {
+ fsa_data_t *fsa_data = (fsa_data_t *) iter->data;
+
+ crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
+ fsa_input2string(fsa_data->fsa_input),
+ fsa_state2string(controld_globals.fsa_state),
+ fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
+ delete_fsa_input(fsa_data);
+ }
+
+ controld_clear_fsa_input_flags(R_MEMBERSHIP);
+
+ g_list_free(controld_globals.fsa_message_queue);
+ controld_globals.fsa_message_queue = NULL;
+
+ controld_election_fini();
+
+ /* Tear down the CIB manager connection, but don't free it yet -- it could
+ * be used when we drain the mainloop later.
+ */
+
+ controld_disconnect_cib_manager();
+
+ verify_stopped(controld_globals.fsa_state, LOG_WARNING);
+ controld_clear_fsa_input_flags(R_LRM_CONNECTED);
+ lrm_state_destroy_all();
+
+ mainloop_destroy_trigger(config_read_trigger);
+ config_read_trigger = NULL;
+
+ controld_destroy_fsa_trigger();
+ controld_destroy_transition_trigger();
+
+ pcmk__client_cleanup();
+ crm_peer_destroy();
+
+ controld_free_fsa_timers();
+ te_cleanup_stonith_history_sync(NULL, TRUE);
+ controld_free_sched_timer();
+
+ free(controld_globals.our_nodename);
+ controld_globals.our_nodename = NULL;
+
+ free(controld_globals.our_uuid);
+ controld_globals.our_uuid = NULL;
+
+ free(controld_globals.dc_name);
+ controld_globals.dc_name = NULL;
+
+ free(controld_globals.dc_version);
+ controld_globals.dc_version = NULL;
+
+ free(controld_globals.cluster_name);
+ controld_globals.cluster_name = NULL;
+
+ free(controld_globals.te_uuid);
+ controld_globals.te_uuid = NULL;
+
+ free_max_generation();
+ controld_destroy_cib_replacements_table();
+ controld_destroy_failed_sync_table();
+ controld_destroy_outside_events_table();
+
+ mainloop_destroy_signal(SIGPIPE);
+ mainloop_destroy_signal(SIGUSR1);
+ mainloop_destroy_signal(SIGTERM);
+ mainloop_destroy_signal(SIGTRAP);
+ /* leave SIGCHLD engaged as we might still want to drain some service-actions */
+
+ if (mloop) {
+ GMainContext *ctx = g_main_loop_get_context(controld_globals.mainloop);
+
+ /* Don't re-enter this block */
+ controld_globals.mainloop = NULL;
+
+ /* no signals on final draining anymore */
+ mainloop_destroy_signal(SIGCHLD);
+
+ crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
+
+ {
+ int lpc = 0;
+
+ while((g_main_context_pending(ctx) && lpc < 10)) {
+ lpc++;
+ crm_trace("Iteration %d", lpc);
+ g_main_context_dispatch(ctx);
+ }
+ }
+
+ crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
+ g_main_loop_quit(mloop);
+
+ /* Won't do anything yet, since we're inside it now */
+ g_main_loop_unref(mloop);
+ } else {
+ mainloop_destroy_signal(SIGCHLD);
+ }
+
+ cib_delete(controld_globals.cib_conn);
+ controld_globals.cib_conn = NULL;
+
+ throttle_fini();
+
+ /* Graceful */
+ crm_trace("Done preparing for exit with status %d (%s)",
+ exit_code, crm_exit_str(exit_code));
+ return exit_code;
+}
+
+/* A_EXIT_0, A_EXIT_1 */
+void
+do_exit(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ crm_exit_t exit_code = CRM_EX_OK;
+ int log_level = LOG_INFO;
+ const char *exit_type = "gracefully";
+
+ if (action & A_EXIT_1) {
+ log_level = LOG_ERR;
+ exit_type = "forcefully";
+ exit_code = CRM_EX_ERROR;
+ }
+
+ verify_stopped(cur_state, LOG_ERR);
+ do_crm_log(log_level, "Performing %s - %s exiting the controller",
+ fsa_action2string(action), exit_type);
+
+ crm_info("[%s] stopped (%d)", crm_system_name, exit_code);
+ crmd_exit(exit_code);
+}
+
+static void sigpipe_ignore(int nsig) { return; }
+
+/* A_STARTUP */
+void
+do_startup(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ crm_debug("Registering Signal Handlers");
+ mainloop_add_signal(SIGTERM, crm_shutdown);
+ mainloop_add_signal(SIGPIPE, sigpipe_ignore);
+
+ config_read_trigger = mainloop_add_trigger(G_PRIORITY_HIGH,
+ crm_read_options, NULL);
+
+ controld_init_fsa_trigger();
+ controld_init_transition_trigger();
+
+ crm_debug("Creating CIB manager and executor objects");
+ controld_globals.cib_conn = cib_new();
+
+ lrm_state_init_local();
+ if (controld_init_fsa_timers() == FALSE) {
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ }
+}
+
+// \return libqb error code (0 on success, -errno on error)
+static int32_t
+accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
+{
+ crm_trace("Accepting new IPC client connection");
+ if (pcmk__new_client(c, uid, gid) == NULL) {
+ return -EIO;
+ }
+ return 0;
+}
+
+// \return libqb error code (0 on success, -errno on error)
+static int32_t
+dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size)
+{
+ uint32_t id = 0;
+ uint32_t flags = 0;
+ pcmk__client_t *client = pcmk__find_client(c);
+
+ xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags);
+
+ if (msg == NULL) {
+ pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL);
+ return 0;
+ }
+ pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_INDETERMINATE);
+
+ CRM_ASSERT(client->user != NULL);
+ pcmk__update_acl_user(msg, F_CRM_USER, client->user);
+
+ crm_xml_add(msg, F_CRM_SYS_FROM, client->id);
+ if (controld_authorize_ipc_message(msg, client, NULL)) {
+ crm_trace("Processing IPC message from client %s",
+ pcmk__client_name(client));
+ route_message(C_IPC_MESSAGE, msg);
+ }
+
+ controld_trigger_fsa();
+ free_xml(msg);
+ return 0;
+}
+
+static int32_t
+ipc_client_disconnected(qb_ipcs_connection_t *c)
+{
+ pcmk__client_t *client = pcmk__find_client(c);
+
+ if (client) {
+ crm_trace("Disconnecting %sregistered client %s (%p/%p)",
+ (client->userdata? "" : "un"), pcmk__client_name(client),
+ c, client);
+ free(client->userdata);
+ pcmk__free_client(client);
+ controld_trigger_fsa();
+ }
+ return 0;
+}
+
+static void
+ipc_connection_destroyed(qb_ipcs_connection_t *c)
+{
+ crm_trace("Connection %p", c);
+ ipc_client_disconnected(c);
+}
+
+/* A_STOP */
+void
+do_stop(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ crm_trace("Closing IPC server");
+ mainloop_del_ipc_server(ipcs); ipcs = NULL;
+ register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
+}
+
+/* A_STARTED */
+void
+do_started(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ static struct qb_ipcs_service_handlers crmd_callbacks = {
+ .connection_accept = accept_controller_client,
+ .connection_created = NULL,
+ .msg_process = dispatch_controller_ipc,
+ .connection_closed = ipc_client_disconnected,
+ .connection_destroyed = ipc_connection_destroyed
+ };
+
+ if (cur_state != S_STARTING) {
+ crm_err("Start cancelled... %s", fsa_state2string(cur_state));
+ return;
+
+ } else if (!pcmk_is_set(controld_globals.fsa_input_register,
+ R_MEMBERSHIP)) {
+ crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
+
+ crmd_fsa_stall(TRUE);
+ return;
+
+ } else if (!pcmk_is_set(controld_globals.fsa_input_register,
+ R_LRM_CONNECTED)) {
+ crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);
+
+ crmd_fsa_stall(TRUE);
+ return;
+
+ } else if (!pcmk_is_set(controld_globals.fsa_input_register,
+ R_CIB_CONNECTED)) {
+ crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
+
+ crmd_fsa_stall(TRUE);
+ return;
+
+ } else if (!pcmk_is_set(controld_globals.fsa_input_register,
+ R_READ_CONFIG)) {
+ crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
+
+ crmd_fsa_stall(TRUE);
+ return;
+
+ } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_PEER_DATA)) {
+
+ crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
+ crmd_fsa_stall(TRUE);
+ return;
+ }
+
+ crm_debug("Init server comms");
+ ipcs = pcmk__serve_controld_ipc(&crmd_callbacks);
+ if (ipcs == NULL) {
+ crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ } else {
+ crm_notice("Pacemaker controller successfully started and accepting connections");
+ }
+ controld_trigger_fencer_connect();
+
+ controld_clear_fsa_input_flags(R_STARTING);
+ register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
+}
+
+/* A_RECOVER */
+void
+do_recover(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ controld_set_fsa_input_flags(R_IN_RECOVERY);
+ crm_warn("Fast-tracking shutdown in response to errors");
+
+ register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
+}
+
+static pcmk__cluster_option_t controller_options[] = {
+ /* name, old name, type, allowed values,
+ * default value, validator,
+ * short description,
+ * long description
+ */
+ {
+ "dc-version", NULL, "string", NULL, PCMK__VALUE_NONE, NULL,
+ N_("Pacemaker version on cluster node elected Designated Controller (DC)"),
+ N_("Includes a hash which identifies the exact changeset the code was "
+ "built from. Used for diagnostic purposes.")
+ },
+ {
+ "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL,
+ N_("The messaging stack on which Pacemaker is currently running"),
+ N_("Used for informational and diagnostic purposes.")
+ },
+ {
+ "cluster-name", NULL, "string", NULL, NULL, NULL,
+ N_("An arbitrary name for the cluster"),
+ N_("This optional value is mostly for users' convenience as desired "
+ "in administration, but may also be used in Pacemaker "
+ "configuration rules via the #cluster-name node attribute, and "
+ "by higher-level tools and resource agents.")
+ },
+ {
+ XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time",
+ NULL, "20s", pcmk__valid_interval_spec,
+ N_("How long to wait for a response from other nodes during start-up"),
+ N_("The optimal value will depend on the speed and load of your network "
+ "and the type of switches used.")
+ },
+ {
+ XML_CONFIG_ATTR_RECHECK, NULL, "time",
+ N_("Zero disables polling, while positive values are an interval in seconds"
+ "(unless other units are specified, for example \"5min\")"),
+ "15min", pcmk__valid_interval_spec,
+ N_("Polling interval to recheck cluster state and evaluate rules "
+ "with date specifications"),
+ N_("Pacemaker is primarily event-driven, and looks ahead to know when to "
+ "recheck cluster state for failure timeouts and most time-based "
+ "rules. However, it will also recheck the cluster after this "
+ "amount of inactivity, to evaluate rules with date specifications "
+ "and serve as a fail-safe for certain types of scheduler bugs.")
+ },
+ {
+ "load-threshold", NULL, "percentage", NULL,
+ "80%", pcmk__valid_percentage,
+ N_("Maximum amount of system load that should be used by cluster nodes"),
+ N_("The cluster will slow down its recovery process when the amount of "
+ "system resources used (currently CPU) approaches this limit"),
+ },
+ {
+ "node-action-limit", NULL, "integer", NULL,
+ "0", pcmk__valid_number,
+ N_("Maximum number of jobs that can be scheduled per node "
+ "(defaults to 2x cores)")
+ },
+ { XML_CONFIG_ATTR_FENCE_REACTION, NULL, "string", NULL, "stop", NULL,
+ N_("How a cluster node should react if notified of its own fencing"),
+ N_("A cluster node may receive notification of its own fencing if fencing "
+ "is misconfigured, or if fabric fencing is in use that doesn't cut "
+ "cluster communication. Allowed values are \"stop\" to attempt to "
+ "immediately stop Pacemaker and stay stopped, or \"panic\" to attempt "
+ "to immediately reboot the local node, falling back to stop on failure.")
+ },
+ {
+ XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL,
+ "2min", pcmk__valid_interval_spec,
+ "*** Advanced Use Only ***",
+ N_("Declare an election failed if it is not decided within this much "
+ "time. If you need to adjust this value, it probably indicates "
+ "the presence of a bug.")
+ },
+ {
+ XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL,
+ "20min", pcmk__valid_interval_spec,
+ "*** Advanced Use Only ***",
+ N_("Exit immediately if shutdown does not complete within this much "
+ "time. If you need to adjust this value, it probably indicates "
+ "the presence of a bug.")
+ },
+ {
+ "join-integration-timeout", "crmd-integration-timeout", "time", NULL,
+ "3min", pcmk__valid_interval_spec,
+ "*** Advanced Use Only ***",
+ N_("If you need to adjust this value, it probably indicates "
+ "the presence of a bug.")
+ },
+ {
+ "join-finalization-timeout", "crmd-finalization-timeout", "time", NULL,
+ "30min", pcmk__valid_interval_spec,
+ "*** Advanced Use Only ***",
+ N_("If you need to adjust this value, it probably indicates "
+ "the presence of a bug.")
+ },
+ {
+ "transition-delay", "crmd-transition-delay", "time", NULL,
+ "0s", pcmk__valid_interval_spec,
+ N_("*** Advanced Use Only *** Enabling this option will slow down "
+ "cluster recovery under all conditions"),
+ N_("Delay cluster recovery for this much time to allow for additional "
+ "events to occur. Useful if your configuration is sensitive to "
+ "the order in which ping updates arrive.")
+ },
+ {
+ "stonith-watchdog-timeout", NULL, "time", NULL,
+ "0", controld_verify_stonith_watchdog_timeout,
+ N_("How long before nodes can be assumed to be safely down when "
+ "watchdog-based self-fencing via SBD is in use"),
+ N_("If this is set to a positive value, lost nodes are assumed to "
+ "self-fence using watchdog-based SBD within this much time. This "
+ "does not require a fencing resource to be explicitly configured, "
+ "though a fence_watchdog resource can be configured, to limit use "
+ "to specific nodes. If this is set to 0 (the default), the cluster "
+ "will never assume watchdog-based self-fencing. If this is set to a "
+ "negative value, the cluster will use twice the local value of the "
+ "`SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, "
+ "or otherwise treat this as 0. WARNING: When used, this timeout "
+ "must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use "
+ "watchdog-based SBD, and Pacemaker will refuse to start on any of "
+ "those nodes where this is not true for the local value or SBD is "
+ "not active. When this is set to a negative value, "
+ "`SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes "
+ "that use SBD, otherwise data corruption or loss could occur.")
+ },
+ {
+ "stonith-max-attempts", NULL, "integer", NULL,
+ "10", pcmk__valid_positive_number,
+ N_("How many times fencing can fail before it will no longer be "
+ "immediately re-attempted on a target")
+ },
+
+ // Already documented in libpe_status (other values must be kept identical)
+ {
+ "no-quorum-policy", NULL, "select",
+ "stop, freeze, ignore, demote, suicide", "stop", pcmk__valid_quorum,
+ N_("What to do when the cluster does not have quorum"), NULL
+ },
+ {
+ XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL,
+ "false", pcmk__valid_boolean,
+ N_("Whether to lock resources to a cleanly shut down node"),
+ N_("When true, resources active on a node when it is cleanly shut down "
+ "are kept \"locked\" to that node (not allowed to run elsewhere) "
+ "until they start again on that node after it rejoins (or for at "
+ "most shutdown-lock-limit, if set). Stonith resources and "
+ "Pacemaker Remote connections are never locked. Clone and bundle "
+ "instances and the promoted role of promotable clones are "
+ "currently never locked, though support could be added in a future "
+ "release.")
+ },
+ {
+ XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL,
+ "0", pcmk__valid_interval_spec,
+ N_("Do not lock resources to a cleanly shut down node longer than "
+ "this"),
+ N_("If shutdown-lock is true and this is set to a nonzero time "
+ "duration, shutdown locks will expire after this much time has "
+ "passed since the shutdown was initiated, even if the node has not "
+ "rejoined.")
+ },
+};
+
+void
+crmd_metadata(void)
+{
+ const char *desc_short = "Pacemaker controller options";
+ const char *desc_long = "Cluster options used by Pacemaker's controller";
+
+ gchar *s = pcmk__format_option_metadata("pacemaker-controld", desc_short,
+ desc_long, controller_options,
+ PCMK__NELEM(controller_options));
+ printf("%s", s);
+ g_free(s);
+}
+
+static void
+config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ const char *value = NULL;
+ GHashTable *config_hash = NULL;
+ crm_time_t *now = crm_time_new(NULL);
+ xmlNode *crmconfig = NULL;
+ xmlNode *alerts = NULL;
+
+ if (rc != pcmk_ok) {
+ fsa_data_t *msg_data = NULL;
+
+ crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+
+ if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
+ crm_err("The cluster is mis-configured - shutting down and staying down");
+ controld_set_fsa_input_flags(R_STAYDOWN);
+ }
+ goto bail;
+ }
+
+ crmconfig = output;
+ if ((crmconfig) &&
+ (crm_element_name(crmconfig)) &&
+ (strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) {
+ crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG);
+ }
+ if (!crmconfig) {
+ fsa_data_t *msg_data = NULL;
+
+ crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed");
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ goto bail;
+ }
+
+ crm_debug("Call %d : Parsing CIB options", call_id);
+ config_hash = pcmk__strkey_table(free, free);
+ pe_unpack_nvpairs(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL,
+ config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL);
+
+ // Validate all options, and use defaults if not already present in hash
+ pcmk__validate_cluster_options(config_hash, controller_options,
+ PCMK__NELEM(controller_options));
+
+ value = g_hash_table_lookup(config_hash, "no-quorum-policy");
+ if (pcmk__str_eq(value, "suicide", pcmk__str_casei) && pcmk__locate_sbd()) {
+ controld_set_global_flags(controld_no_quorum_suicide);
+ }
+
+ value = g_hash_table_lookup(config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK);
+ if (crm_is_true(value)) {
+ controld_set_global_flags(controld_shutdown_lock_enabled);
+ } else {
+ controld_clear_global_flags(controld_shutdown_lock_enabled);
+ }
+
+ value = g_hash_table_lookup(config_hash,
+ XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT);
+ controld_globals.shutdown_lock_limit = crm_parse_interval_spec(value)
+ / 1000;
+
+ value = g_hash_table_lookup(config_hash, "cluster-name");
+ pcmk__str_update(&(controld_globals.cluster_name), value);
+
+ // Let subcomponents initialize their own static variables
+ controld_configure_election(config_hash);
+ controld_configure_fencing(config_hash);
+ controld_configure_fsa_timers(config_hash);
+ controld_configure_throttle(config_hash);
+
+ alerts = first_named_child(output, XML_CIB_TAG_ALERTS);
+ crmd_unpack_alerts(alerts);
+
+ controld_set_fsa_input_flags(R_READ_CONFIG);
+ controld_trigger_fsa();
+
+ g_hash_table_destroy(config_hash);
+ bail:
+ crm_time_free(now);
+}
+
+/*!
+ * \internal
+ * \brief Trigger read and processing of the configuration
+ *
+ * \param[in] fn Calling function name
+ * \param[in] line Line number where call occurred
+ */
+void
+controld_trigger_config_as(const char *fn, int line)
+{
+ if (config_read_trigger != NULL) {
+ crm_trace("%s:%d - Triggered config processing", fn, line);
+ mainloop_set_trigger(config_read_trigger);
+ }
+}
+
+gboolean
+crm_read_options(gpointer user_data)
+{
+ cib_t *cib_conn = controld_globals.cib_conn;
+ int call_id = cib_conn->cmds->query(cib_conn,
+ "//" XML_CIB_TAG_CRMCONFIG
+ " | //" XML_CIB_TAG_ALERTS,
+ NULL, cib_xpath|cib_scope_local);
+
+ fsa_register_cib_callback(call_id, NULL, config_query_callback);
+ crm_trace("Querying the CIB... call %d", call_id);
+ return TRUE;
+}
+
+/* A_READCONFIG */
+void
+do_read_config(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ throttle_init();
+ controld_trigger_config();
+}
+
+void
+crm_shutdown(int nsig)
+{
+ const char *value = NULL;
+ guint default_period_ms = 0;
+
+ if ((controld_globals.mainloop == NULL)
+ || !g_main_loop_is_running(controld_globals.mainloop)) {
+ crmd_exit(CRM_EX_OK);
+ return;
+ }
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ crm_err("Escalating shutdown");
+ register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
+ return;
+ }
+
+ controld_set_fsa_input_flags(R_SHUTDOWN);
+ register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
+
+ /* If shutdown timer doesn't have a period set, use the default
+ *
+ * @TODO: Evaluate whether this is still necessary. As long as
+ * config_query_callback() has been run at least once, it doesn't look like
+ * anything could have changed the timer period since then.
+ */
+ value = pcmk__cluster_option(NULL, controller_options,
+ PCMK__NELEM(controller_options),
+ XML_CONFIG_ATTR_FORCE_QUIT);
+ default_period_ms = crm_parse_interval_spec(value);
+ controld_shutdown_start_countdown(default_period_ms);
+}
diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c
new file mode 100644
index 0000000..4378b30
--- /dev/null
+++ b/daemons/controld/controld_corosync.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <crm/crm.h>
+#include <crm/cluster/internal.h>
+#include <crm/common/xml.h>
+
+#include <pacemaker-controld.h>
+
+#if SUPPORT_COROSYNC
+
+extern void post_cache_update(int seq);
+
+/* A_HA_CONNECT */
+
+static void
+crmd_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName,
+ uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
+{
+ uint32_t kind = 0;
+ const char *from = NULL;
+ char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
+
+ if(data == NULL) {
+ return;
+ }
+ if (kind == crm_class_cluster) {
+ crm_node_t *peer = NULL;
+ xmlNode *xml = string2xml(data);
+
+ if (xml == NULL) {
+ crm_err("Could not parse message content (%d): %.100s", kind, data);
+ free(data);
+ return;
+ }
+
+ crm_xml_add(xml, F_ORIG, from);
+ /* crm_xml_add_int(xml, F_SEQ, wrapper->id); Fake? */
+
+ peer = crm_get_peer(0, from);
+ if (!pcmk_is_set(peer->processes, crm_proc_cpg)) {
+ /* If we can still talk to our peer process on that node,
+ * then it must be part of the corosync membership
+ */
+ crm_warn("Receiving messages from a node we think is dead: %s[%d]",
+ peer->uname, peer->id);
+ crm_update_peer_proc(__func__, peer, crm_proc_cpg,
+ ONLINESTATUS);
+ }
+ crmd_ha_msg_filter(xml);
+ free_xml(xml);
+ } else {
+ crm_err("Invalid message class (%d): %.100s", kind, data);
+ }
+ free(data);
+}
+
+static gboolean
+crmd_quorum_callback(unsigned long long seq, gboolean quorate)
+{
+ crm_update_quorum(quorate, FALSE);
+ post_cache_update(seq);
+ return TRUE;
+}
+
+static void
+crmd_cs_destroy(gpointer user_data)
+{
+ if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) {
+ crm_crit("Lost connection to cluster layer, shutting down");
+ crmd_exit(CRM_EX_DISCONNECT);
+
+ } else {
+ crm_info("Corosync connection closed");
+ }
+}
+
+/*!
+ * \brief Handle a Corosync notification of a CPG configuration change
+ *
+ * \param[in] handle CPG connection
+ * \param[in] cpg_name CPG group name
+ * \param[in] member_list List of current CPG members
+ * \param[in] member_list_entries Number of entries in \p member_list
+ * \param[in] left_list List of CPG members that left
+ * \param[in] left_list_entries Number of entries in \p left_list
+ * \param[in] joined_list List of CPG members that joined
+ * \param[in] joined_list_entries Number of entries in \p joined_list
+ */
+static void
+cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name,
+ const struct cpg_address *member_list,
+ size_t member_list_entries,
+ const struct cpg_address *left_list,
+ size_t left_list_entries,
+ const struct cpg_address *joined_list,
+ size_t joined_list_entries)
+{
+ /* When nodes leave CPG, the DC clears their transient node attributes.
+ *
+ * However if there is no DC, or the DC is among the nodes that left, each
+ * remaining node needs to do the clearing, to ensure it gets done.
+ * Otherwise, the attributes would persist when the nodes rejoin, which
+ * could have serious consequences for unfencing, agents that use attributes
+ * for internal logic, etc.
+ *
+ * Here, we set a global boolean if the DC is among the nodes that left, for
+ * use by the peer callback.
+ */
+ if (controld_globals.dc_name != NULL) {
+ crm_node_t *peer = NULL;
+
+ peer = pcmk__search_cluster_node_cache(0, controld_globals.dc_name);
+ if (peer != NULL) {
+ for (int i = 0; i < left_list_entries; ++i) {
+ if (left_list[i].nodeid == peer->id) {
+ controld_set_global_flags(controld_dc_left);
+ break;
+ }
+ }
+ }
+ }
+
+ // Process the change normally, which will call the peer callback as needed
+ pcmk_cpg_membership(handle, cpg_name, member_list, member_list_entries,
+ left_list, left_list_entries,
+ joined_list, joined_list_entries);
+
+ controld_clear_global_flags(controld_dc_left);
+}
+
+extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
+
+gboolean
+crm_connect_corosync(crm_cluster_t * cluster)
+{
+ if (is_corosync_cluster()) {
+ crm_set_status_callback(&peer_update_callback);
+ cluster->cpg.cpg_deliver_fn = crmd_cs_dispatch;
+ cluster->cpg.cpg_confchg_fn = cpg_membership_callback;
+ cluster->destroy = crmd_cs_destroy;
+
+ if (crm_cluster_connect(cluster)) {
+ pcmk__corosync_quorum_connect(crmd_quorum_callback,
+ crmd_cs_destroy);
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+#endif
diff --git a/daemons/controld/controld_election.c b/daemons/controld/controld_election.c
new file mode 100644
index 0000000..5f33d5b
--- /dev/null
+++ b/daemons/controld/controld_election.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/cluster/internal.h>
+#include <crm/cluster/election_internal.h>
+#include <crm/crm.h>
+
+#include <pacemaker-controld.h>
+
+static election_t *fsa_election = NULL;
+
+static gboolean
+election_win_cb(gpointer data)
+{
+ register_fsa_input(C_FSA_INTERNAL, I_ELECTION_DC, NULL);
+ return FALSE;
+}
+
+void
+controld_election_init(const char *uname)
+{
+ fsa_election = election_init("DC", uname, 60000 /*60s*/, election_win_cb);
+}
+
+/*!
+ * \internal
+ * \brief Configure election options based on the CIB
+ *
+ * \param[in,out] options Name/value pairs for configured options
+ */
+void
+controld_configure_election(GHashTable *options)
+{
+ const char *value = NULL;
+
+ value = g_hash_table_lookup(options, XML_CONFIG_ATTR_ELECTION_FAIL);
+ election_timeout_set_period(fsa_election, crm_parse_interval_spec(value));
+}
+
+void
+controld_remove_voter(const char *uname)
+{
+ election_remove(fsa_election, uname);
+
+ if (pcmk__str_eq(uname, controld_globals.dc_name, pcmk__str_casei)) {
+ /* Clear any election dampening in effect. Otherwise, if the lost DC had
+ * just won, an immediate new election could fizzle out with no new DC.
+ */
+ election_clear_dampening(fsa_election);
+ }
+}
+
+void
+controld_election_fini(void)
+{
+ election_fini(fsa_election);
+ fsa_election = NULL;
+}
+
+void
+controld_stop_current_election_timeout(void)
+{
+ election_timeout_stop(fsa_election);
+}
+
+/* A_ELECTION_VOTE */
+void
+do_election_vote(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ gboolean not_voting = FALSE;
+
+ /* don't vote if we're in one of these states or wanting to shut down */
+ switch (cur_state) {
+ case S_STARTING:
+ case S_RECOVERY:
+ case S_STOPPING:
+ case S_TERMINATE:
+ crm_warn("Not voting in election, we're in state %s", fsa_state2string(cur_state));
+ not_voting = TRUE;
+ break;
+ case S_ELECTION:
+ case S_INTEGRATION:
+ case S_RELEASE_DC:
+ break;
+ default:
+ crm_err("Broken? Voting in state %s", fsa_state2string(cur_state));
+ break;
+ }
+
+ if (not_voting == FALSE) {
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_STARTING)) {
+ not_voting = TRUE;
+ }
+ }
+
+ if (not_voting) {
+ if (AM_I_DC) {
+ register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL);
+
+ } else {
+ register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL);
+ }
+ return;
+ }
+
+ election_vote(fsa_election);
+ return;
+}
+
+void
+do_election_check(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ if (controld_globals.fsa_state == S_ELECTION) {
+ election_check(fsa_election);
+ } else {
+ crm_debug("Ignoring election check because we are not in an election");
+ }
+}
+
+/* A_ELECTION_COUNT */
+void
+do_election_count_vote(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ enum election_result rc = 0;
+ ha_msg_input_t *vote = fsa_typed_data(fsa_dt_ha_msg);
+
+ if(crm_peer_cache == NULL) {
+ if (!pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ crm_err("Internal error, no peer cache");
+ }
+ return;
+ }
+
+ rc = election_count_vote(fsa_election, vote->msg, cur_state != S_STARTING);
+ switch(rc) {
+ case election_start:
+ election_reset(fsa_election);
+ register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
+ break;
+
+ case election_lost:
+ update_dc(NULL);
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_THE_DC)) {
+ cib_t *cib_conn = controld_globals.cib_conn;
+
+ register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL);
+ cib_conn->cmds->set_secondary(cib_conn, cib_scope_local);
+
+ } else if (cur_state != S_STARTING) {
+ register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL);
+ }
+ break;
+
+ default:
+ crm_trace("Election message resulted in state %d", rc);
+ }
+}
+
+static void
+feature_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ if (rc != pcmk_ok) {
+ fsa_data_t *msg_data = NULL;
+
+ crm_notice("Feature update failed: %s "CRM_XS" rc=%d",
+ pcmk_strerror(rc), rc);
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Update a node attribute in the CIB during a DC takeover
+ *
+ * \param[in] name Name of attribute to update
+ * \param[in] value New attribute value
+ */
+#define dc_takeover_update_attr(name, value) do { \
+ cib__update_node_attr(controld_globals.logger_out, \
+ controld_globals.cib_conn, cib_none, \
+ XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, \
+ name, value, NULL, NULL); \
+ } while (0)
+
+/* A_DC_TAKEOVER */
+void
+do_dc_takeover(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ xmlNode *cib = NULL;
+ const char *cluster_type = name_for_cluster_type(get_cluster_type());
+ pid_t watchdog = pcmk__locate_sbd();
+
+ crm_info("Taking over DC status for this partition");
+ controld_set_fsa_input_flags(R_THE_DC);
+ execute_stonith_cleanup();
+
+ election_reset(fsa_election);
+ controld_set_fsa_input_flags(R_JOIN_OK|R_INVOKE_PE);
+
+ controld_globals.cib_conn->cmds->set_primary(controld_globals.cib_conn,
+ cib_scope_local);
+
+ cib = create_xml_node(NULL, XML_TAG_CIB);
+ crm_xml_add(cib, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
+ controld_update_cib(XML_TAG_CIB, cib, cib_none, feature_update_callback);
+
+ dc_takeover_update_attr(XML_ATTR_HAVE_WATCHDOG, pcmk__btoa(watchdog));
+ dc_takeover_update_attr("dc-version", PACEMAKER_VERSION "-" BUILD_VERSION);
+ dc_takeover_update_attr("cluster-infrastructure", cluster_type);
+
+#if SUPPORT_COROSYNC
+ if ((controld_globals.cluster_name == NULL) && is_corosync_cluster()) {
+ char *cluster_name = pcmk__corosync_cluster_name();
+
+ if (cluster_name != NULL) {
+ dc_takeover_update_attr("cluster-name", cluster_name);
+ }
+ free(cluster_name);
+ }
+#endif
+
+ controld_trigger_config();
+ free_xml(cib);
+}
+
+/* A_DC_RELEASE */
+void
+do_dc_release(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ if (action & A_DC_RELEASE) {
+ crm_debug("Releasing the role of DC");
+ controld_clear_fsa_input_flags(R_THE_DC);
+ controld_expect_sched_reply(NULL);
+
+ } else if (action & A_DC_RELEASED) {
+ crm_info("DC role released");
+#if 0
+ if (are there errors) {
+ /* we can't stay up if not healthy */
+ /* or perhaps I_ERROR and go to S_RECOVER? */
+ result = I_SHUTDOWN;
+ }
+#endif
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ xmlNode *update = NULL;
+ crm_node_t *node = crm_get_peer(0, controld_globals.our_nodename);
+
+ pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN);
+ update = create_node_state_update(node, node_update_expected, NULL,
+ __func__);
+ /* Don't need a based response because controld will stop. */
+ fsa_cib_anon_update_discard_reply(XML_CIB_TAG_STATUS, update);
+ free_xml(update);
+ }
+ register_fsa_input(C_FSA_INTERNAL, I_RELEASE_SUCCESS, NULL);
+
+ } else {
+ crm_err("Unknown DC action %s", fsa_action2string(action));
+ }
+
+ crm_trace("Am I still the DC? %s", AM_I_DC ? XML_BOOLEAN_YES : XML_BOOLEAN_NO);
+
+}
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
new file mode 100644
index 0000000..0de399c
--- /dev/null
+++ b/daemons/controld/controld_execd.c
@@ -0,0 +1,2433 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <regex.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include <crm/crm.h>
+#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_rsc_info_t, etc.
+#include <crm/services.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/pengine/rules.h>
+#include <crm/lrmd_internal.h>
+
+#include <pacemaker-internal.h>
+#include <pacemaker-controld.h>
+
+#define START_DELAY_THRESHOLD 5 * 60 * 1000
+#define MAX_LRM_REG_FAILS 30
+
+struct delete_event_s {
+ int rc;
+ const char *rsc;
+ lrm_state_t *lrm_state;
+};
+
+static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id);
+static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list);
+static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data);
+
+static lrmd_event_data_t *construct_op(const lrm_state_t *lrm_state,
+ const xmlNode *rsc_op,
+ const char *rsc_id,
+ const char *operation);
+static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
+ xmlNode *msg, struct ra_metadata_s *md);
+
+static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
+ int log_level);
+
+static void
+lrm_connection_destroy(void)
+{
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) {
+ crm_crit("Connection to executor failed");
+ register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
+ controld_clear_fsa_input_flags(R_LRM_CONNECTED);
+
+ } else {
+ crm_info("Disconnected from executor");
+ }
+
+}
+
+static char *
+make_stop_id(const char *rsc, int call_id)
+{
+ return crm_strdup_printf("%s:%d", rsc, call_id);
+}
+
+static void
+copy_instance_keys(gpointer key, gpointer value, gpointer user_data)
+{
+ if (strstr(key, CRM_META "_") == NULL) {
+ g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
+ }
+}
+
+static void
+copy_meta_keys(gpointer key, gpointer value, gpointer user_data)
+{
+ if (strstr(key, CRM_META "_") != NULL) {
+ g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
+ }
+}
+
+/*!
+ * \internal
+ * \brief Remove a recurring operation from a resource's history
+ *
+ * \param[in,out] history Resource history to modify
+ * \param[in] op Operation to remove
+ *
+ * \return TRUE if the operation was found and removed, FALSE otherwise
+ */
+static gboolean
+history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op)
+{
+ GList *iter;
+
+ for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
+ lrmd_event_data_t *existing = iter->data;
+
+ if ((op->interval_ms == existing->interval_ms)
+ && pcmk__str_eq(op->rsc_id, existing->rsc_id, pcmk__str_none)
+ && pcmk__str_eq(op->op_type, existing->op_type, pcmk__str_casei)) {
+
+ history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter);
+ lrmd_free_event(existing);
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+/*!
+ * \internal
+ * \brief Free all recurring operations in resource history
+ *
+ * \param[in,out] history Resource history to modify
+ */
+static void
+history_free_recurring_ops(rsc_history_t *history)
+{
+ GList *iter;
+
+ for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
+ lrmd_free_event(iter->data);
+ }
+ g_list_free(history->recurring_op_list);
+ history->recurring_op_list = NULL;
+}
+
+/*!
+ * \internal
+ * \brief Free resource history
+ *
+ * \param[in,out] history Resource history to free
+ */
+void
+history_free(gpointer data)
+{
+ rsc_history_t *history = (rsc_history_t*)data;
+
+ if (history->stop_params) {
+ g_hash_table_destroy(history->stop_params);
+ }
+
+ /* Don't need to free history->rsc.id because it's set to history->id */
+ free(history->rsc.type);
+ free(history->rsc.standard);
+ free(history->rsc.provider);
+
+ lrmd_free_event(history->failed);
+ lrmd_free_event(history->last);
+ free(history->id);
+ history_free_recurring_ops(history);
+ free(history);
+}
+
+static void
+update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
+{
+ int target_rc = 0;
+ rsc_history_t *entry = NULL;
+
+ if (op->rsc_deleted) {
+ crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type);
+ controld_delete_resource_history(op->rsc_id, lrm_state->node_name,
+ NULL, crmd_cib_smart_opt());
+ return;
+ }
+
+ if (pcmk__str_eq(op->op_type, RSC_NOTIFY, pcmk__str_casei)) {
+ return;
+ }
+
+ crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type);
+
+ entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id);
+ if (entry == NULL && rsc) {
+ entry = calloc(1, sizeof(rsc_history_t));
+ entry->id = strdup(op->rsc_id);
+ g_hash_table_insert(lrm_state->resource_history, entry->id, entry);
+
+ entry->rsc.id = entry->id;
+ entry->rsc.type = strdup(rsc->type);
+ entry->rsc.standard = strdup(rsc->standard);
+ pcmk__str_update(&entry->rsc.provider, rsc->provider);
+
+ } else if (entry == NULL) {
+ crm_info("Resource %s no longer exists, not updating cache", op->rsc_id);
+ return;
+ }
+
+ entry->last_callid = op->call_id;
+ target_rc = rsc_op_expected_rc(op);
+ if (op->op_status == PCMK_EXEC_CANCELLED) {
+ if (op->interval_ms > 0) {
+ crm_trace("Removing cancelled recurring op: " PCMK__OP_FMT,
+ op->rsc_id, op->op_type, op->interval_ms);
+ history_remove_recurring_op(entry, op);
+ return;
+ } else {
+ crm_trace("Skipping " PCMK__OP_FMT " rc=%d, status=%d",
+ op->rsc_id, op->op_type, op->interval_ms, op->rc,
+ op->op_status);
+ }
+
+ } else if (did_rsc_op_fail(op, target_rc)) {
+ /* Store failed monitors here, otherwise the block below will cause them
+ * to be forgotten when a stop happens.
+ */
+ if (entry->failed) {
+ lrmd_free_event(entry->failed);
+ }
+ entry->failed = lrmd_copy_event(op);
+
+ } else if (op->interval_ms == 0) {
+ if (entry->last) {
+ lrmd_free_event(entry->last);
+ }
+ entry->last = lrmd_copy_event(op);
+
+ if (op->params && pcmk__strcase_any_of(op->op_type, CRMD_ACTION_START,
+ CRMD_ACTION_RELOAD,
+ CRMD_ACTION_RELOAD_AGENT,
+ CRMD_ACTION_STATUS, NULL)) {
+ if (entry->stop_params) {
+ g_hash_table_destroy(entry->stop_params);
+ }
+ entry->stop_params = pcmk__strkey_table(free, free);
+
+ g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params);
+ }
+ }
+
+ if (op->interval_ms > 0) {
+ /* Ensure there are no duplicates */
+ history_remove_recurring_op(entry, op);
+
+ crm_trace("Adding recurring op: " PCMK__OP_FMT,
+ op->rsc_id, op->op_type, op->interval_ms);
+ entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op));
+
+ } else if (entry->recurring_op_list && !pcmk__str_eq(op->op_type, RSC_STATUS, pcmk__str_casei)) {
+ crm_trace("Dropping %d recurring ops because of: " PCMK__OP_FMT,
+ g_list_length(entry->recurring_op_list), op->rsc_id,
+ op->op_type, op->interval_ms);
+ history_free_recurring_ops(entry);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Send a direct OK ack for a resource task
+ *
+ * \param[in] lrm_state LRM connection
+ * \param[in] input Input message being ack'ed
+ * \param[in] rsc_id ID of affected resource
+ * \param[in] rsc Affected resource (if available)
+ * \param[in] task Operation task being ack'ed
+ * \param[in] ack_host Name of host to send ack to
+ * \param[in] ack_sys IPC system name to ack
+ */
+static void
+send_task_ok_ack(const lrm_state_t *lrm_state, const ha_msg_input_t *input,
+ const char *rsc_id, const lrmd_rsc_info_t *rsc,
+ const char *task, const char *ack_host, const char *ack_sys)
+{
+ lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task);
+
+ lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
+ controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id);
+ lrmd_free_event(op);
+}
+
+static inline const char *
+op_node_name(lrmd_event_data_t *op)
+{
+ return pcmk__s(op->remote_nodename, controld_globals.our_nodename);
+}
+
+void
+lrm_op_callback(lrmd_event_data_t * op)
+{
+ CRM_CHECK(op != NULL, return);
+ switch (op->type) {
+ case lrmd_event_disconnect:
+ if (op->remote_nodename == NULL) {
+ /* If this is the local executor IPC connection, set the right
+ * bits in the controller when the connection goes down.
+ */
+ lrm_connection_destroy();
+ }
+ break;
+
+ case lrmd_event_exec_complete:
+ {
+ lrm_state_t *lrm_state = lrm_state_find(op_node_name(op));
+
+ CRM_ASSERT(lrm_state != NULL);
+ process_lrm_event(lrm_state, op, NULL, NULL);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+try_local_executor_connect(long long action, fsa_data_t *msg_data,
+ lrm_state_t *lrm_state)
+{
+ int rc = pcmk_rc_ok;
+
+ crm_debug("Connecting to the local executor");
+
+ // If we can connect, great
+ rc = controld_connect_local_executor(lrm_state);
+ if (rc == pcmk_rc_ok) {
+ controld_set_fsa_input_flags(R_LRM_CONNECTED);
+ crm_info("Connection to the local executor established");
+ return;
+ }
+
+ // Otherwise, if we can try again, set a timer to do so
+ if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) {
+ crm_warn("Failed to connect to the local executor %d time%s "
+ "(%d max): %s", lrm_state->num_lrm_register_fails,
+ pcmk__plural_s(lrm_state->num_lrm_register_fails),
+ MAX_LRM_REG_FAILS, pcmk_rc_str(rc));
+ controld_start_wait_timer();
+ crmd_fsa_stall(FALSE);
+ return;
+ }
+
+ // Otherwise give up
+ crm_err("Failed to connect to the executor the max allowed "
+ "%d time%s: %s", lrm_state->num_lrm_register_fails,
+ pcmk__plural_s(lrm_state->num_lrm_register_fails),
+ pcmk_rc_str(rc));
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+}
+
+/* A_LRM_CONNECT */
+void
+do_lrm_control(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ /* This only pertains to local executor connections. Remote connections are
+ * handled as resources within the scheduler. Connecting and disconnecting
+ * from remote executor instances is handled differently.
+ */
+
+ lrm_state_t *lrm_state = NULL;
+
+ if (controld_globals.our_nodename == NULL) {
+ return; /* Nothing to do */
+ }
+ lrm_state = lrm_state_find_or_create(controld_globals.our_nodename);
+ if (lrm_state == NULL) {
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ return;
+ }
+
+ if (action & A_LRM_DISCONNECT) {
+ if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) {
+ if (action == A_LRM_DISCONNECT) {
+ crmd_fsa_stall(FALSE);
+ return;
+ }
+ }
+
+ controld_clear_fsa_input_flags(R_LRM_CONNECTED);
+ crm_info("Disconnecting from the executor");
+ lrm_state_disconnect(lrm_state);
+ lrm_state_reset_tables(lrm_state, FALSE);
+ crm_notice("Disconnected from the executor");
+ }
+
+ if (action & A_LRM_CONNECT) {
+ try_local_executor_connect(action, msg_data, lrm_state);
+ }
+
+ if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) {
+ crm_err("Unexpected action %s in %s", fsa_action2string(action),
+ __func__);
+ }
+}
+
+static gboolean
+lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level)
+{
+ int counter = 0;
+ gboolean rc = TRUE;
+ const char *when = "lrm disconnect";
+
+ GHashTableIter gIter;
+ const char *key = NULL;
+ rsc_history_t *entry = NULL;
+ active_op_t *pending = NULL;
+
+ crm_debug("Checking for active resources before exit");
+
+ if (cur_state == S_TERMINATE) {
+ log_level = LOG_ERR;
+ when = "shutdown";
+
+ } else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ when = "shutdown... waiting";
+ }
+
+ if ((lrm_state->active_ops != NULL) && lrm_state_is_connected(lrm_state)) {
+ guint removed = g_hash_table_foreach_remove(lrm_state->active_ops,
+ stop_recurring_actions,
+ lrm_state);
+ guint nremaining = g_hash_table_size(lrm_state->active_ops);
+
+ if (removed || nremaining) {
+ crm_notice("Stopped %u recurring operation%s at %s (%u remaining)",
+ removed, pcmk__plural_s(removed), when, nremaining);
+ }
+ }
+
+ if (lrm_state->active_ops != NULL) {
+ g_hash_table_iter_init(&gIter, lrm_state->active_ops);
+ while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) {
+ /* Ignore recurring actions in the shutdown calculations */
+ if (pending->interval_ms == 0) {
+ counter++;
+ }
+ }
+ }
+
+ if (counter > 0) {
+ do_crm_log(log_level, "%d pending executor operation%s at %s",
+ counter, pcmk__plural_s(counter), when);
+
+ if ((cur_state == S_TERMINATE)
+ || !pcmk_is_set(controld_globals.fsa_input_register,
+ R_SENT_RSC_STOP)) {
+ g_hash_table_iter_init(&gIter, lrm_state->active_ops);
+ while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) {
+ do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key);
+ }
+
+ } else {
+ rc = FALSE;
+ }
+ return rc;
+ }
+
+ if (lrm_state->resource_history == NULL) {
+ return rc;
+ }
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ /* At this point we're not waiting, we're just shutting down */
+ when = "shutdown";
+ }
+
+ counter = 0;
+ g_hash_table_iter_init(&gIter, lrm_state->resource_history);
+ while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) {
+ if (is_rsc_active(lrm_state, entry->id) == FALSE) {
+ continue;
+ }
+
+ counter++;
+ if (log_level == LOG_ERR) {
+ crm_info("Found %s active at %s", entry->id, when);
+ } else {
+ crm_trace("Found %s active at %s", entry->id, when);
+ }
+ if (lrm_state->active_ops != NULL) {
+ GHashTableIter hIter;
+
+ g_hash_table_iter_init(&hIter, lrm_state->active_ops);
+ while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) {
+ if (pcmk__str_eq(entry->id, pending->rsc_id, pcmk__str_none)) {
+ crm_notice("%sction %s (%s) incomplete at %s",
+ pending->interval_ms == 0 ? "A" : "Recurring a",
+ key, pending->op_key, when);
+ }
+ }
+ }
+ }
+
+ if (counter) {
+ crm_err("%d resource%s active at %s",
+ counter, (counter == 1)? " was" : "s were", when);
+ }
+
+ return rc;
+}
+
+static gboolean
+is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id)
+{
+ rsc_history_t *entry = NULL;
+
+ entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
+ if (entry == NULL || entry->last == NULL) {
+ return FALSE;
+ }
+
+ crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type,
+ entry->last->interval_ms, entry->last->rc);
+ if (entry->last->rc == PCMK_OCF_OK && pcmk__str_eq(entry->last->op_type, CRMD_ACTION_STOP, pcmk__str_casei)) {
+ return FALSE;
+
+ } else if (entry->last->rc == PCMK_OCF_OK
+ && pcmk__str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE, pcmk__str_casei)) {
+ // A stricter check is too complex ... leave that to the scheduler
+ return FALSE;
+
+ } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) {
+ return FALSE;
+
+ } else if ((entry->last->interval_ms == 0)
+ && (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) {
+ /* Badly configured resources can't be reliably stopped */
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static gboolean
+build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
+{
+ GHashTableIter iter;
+ rsc_history_t *entry = NULL;
+
+ g_hash_table_iter_init(&iter, lrm_state->resource_history);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
+
+ GList *gIter = NULL;
+ xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE);
+
+ crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id);
+ crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type);
+ crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.standard);
+ crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider);
+
+ if (entry->last && entry->last->params) {
+ const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
+ if (container) {
+ crm_trace("Resource %s is a part of container resource %s", entry->id, container);
+ crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container);
+ }
+ }
+ controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->failed,
+ lrm_state->node_name);
+ controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->last,
+ lrm_state->node_name);
+ for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) {
+ controld_add_resource_history_xml(xml_rsc, &(entry->rsc), gIter->data,
+ lrm_state->node_name);
+ }
+ }
+
+ return FALSE;
+}
+
+xmlNode *
+controld_query_executor_state(void)
+{
+ xmlNode *xml_state = NULL;
+ xmlNode *xml_data = NULL;
+ xmlNode *rsc_list = NULL;
+ crm_node_t *peer = NULL;
+ lrm_state_t *lrm_state = lrm_state_find(controld_globals.our_nodename);
+
+ if (!lrm_state) {
+ crm_err("Could not find executor state for node %s",
+ controld_globals.our_nodename);
+ return NULL;
+ }
+
+ peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY);
+ CRM_CHECK(peer != NULL, return NULL);
+
+ xml_state = create_node_state_update(peer,
+ node_update_cluster|node_update_peer,
+ NULL, __func__);
+ if (xml_state == NULL) {
+ return NULL;
+ }
+
+ xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM);
+ crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid);
+ rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES);
+
+ /* Build a list of active (not always running) resources */
+ build_active_RAs(lrm_state, rsc_list);
+
+ crm_log_xml_trace(xml_state, "Current executor state");
+
+ return xml_state;
+}
+
+/*!
+ * \internal
+ * \brief Map standard Pacemaker return code to operation status and OCF code
+ *
+ * \param[out] event Executor event whose status and return code should be set
+ * \param[in] rc Standard Pacemaker return code
+ */
+void
+controld_rc2event(lrmd_event_data_t *event, int rc)
+{
+ /* This is called for cleanup requests from controller peers/clients, not
+ * for resource actions, so no exit reason is needed.
+ */
+ switch (rc) {
+ case pcmk_rc_ok:
+ lrmd__set_result(event, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
+ break;
+ case EACCES:
+ lrmd__set_result(event, PCMK_OCF_INSUFFICIENT_PRIV,
+ PCMK_EXEC_ERROR, NULL);
+ break;
+ default:
+ lrmd__set_result(event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
+ NULL);
+ break;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Trigger a new transition after CIB status was deleted
+ *
+ * If a CIB status delete was not expected (as part of the transition graph),
+ * trigger a new transition by updating the (arbitrary) "last-lrm-refresh"
+ * cluster property.
+ *
+ * \param[in] from_sys IPC name that requested the delete
+ * \param[in] rsc_id Resource whose status was deleted (for logging only)
+ */
+void
+controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id)
+{
+ if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) {
+ char *now_s = crm_strdup_printf("%lld", (long long) time(NULL));
+
+ crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id);
+ cib__update_node_attr(controld_globals.logger_out,
+ controld_globals.cib_conn, cib_none,
+ XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
+ "last-lrm-refresh", now_s, NULL, NULL);
+ free(now_s);
+ }
+}
+
+static void
+notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc)
+{
+ lrmd_event_data_t *op = NULL;
+ const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
+ const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
+
+ crm_info("Notifying %s on %s that %s was%s deleted",
+ from_sys, (from_host? from_host : "localhost"), rsc_id,
+ ((rc == pcmk_ok)? "" : " not"));
+ op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE);
+ controld_rc2event(op, pcmk_legacy2rc(rc));
+ controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id);
+ lrmd_free_event(op);
+ controld_trigger_delete_refresh(from_sys, rsc_id);
+}
+
+static gboolean
+lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data)
+{
+ struct delete_event_s *event = user_data;
+ struct pending_deletion_op_s *op = value;
+
+ if (pcmk__str_eq(event->rsc, op->rsc, pcmk__str_none)) {
+ notify_deleted(event->lrm_state, op->input, event->rsc, event->rc);
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static gboolean
+lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
+{
+ const char *rsc = user_data;
+ active_op_t *pending = value;
+
+ if (pcmk__str_eq(rsc, pending->rsc_id, pcmk__str_none)) {
+ crm_info("Removing op %s:%d for deleted resource %s",
+ pending->op_key, pending->call_id, rsc);
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static void
+delete_rsc_entry(lrm_state_t *lrm_state, ha_msg_input_t *input,
+ const char *rsc_id, GHashTableIter *rsc_iter, int rc,
+ const char *user_name, bool from_cib)
+{
+ struct delete_event_s event;
+
+ CRM_CHECK(rsc_id != NULL, return);
+
+ if (rc == pcmk_ok) {
+ char *rsc_id_copy = strdup(rsc_id);
+
+ if (rsc_iter) {
+ g_hash_table_iter_remove(rsc_iter);
+ } else {
+ g_hash_table_remove(lrm_state->resource_history, rsc_id_copy);
+ }
+
+ if (from_cib) {
+ controld_delete_resource_history(rsc_id_copy, lrm_state->node_name,
+ user_name, crmd_cib_smart_opt());
+ }
+ g_hash_table_foreach_remove(lrm_state->active_ops,
+ lrm_remove_deleted_op, rsc_id_copy);
+ free(rsc_id_copy);
+ }
+
+ if (input) {
+ notify_deleted(lrm_state, input, rsc_id, rc);
+ }
+
+ event.rc = rc;
+ event.rsc = rsc_id;
+ event.lrm_state = lrm_state;
+ g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event);
+}
+
+static inline gboolean
+last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms)
+{
+ if (entry == NULL) {
+ return FALSE;
+ }
+ if (op == NULL) {
+ return TRUE;
+ }
+ return (pcmk__str_eq(op, entry->failed->op_type, pcmk__str_casei)
+ && (interval_ms == entry->failed->interval_ms));
+}
+
+/*!
+ * \internal
+ * \brief Clear a resource's last failure
+ *
+ * Erase a resource's last failure on a particular node from both the
+ * LRM resource history in the CIB, and the resource history remembered
+ * for the LRM state.
+ *
+ * \param[in] rsc_id Resource name
+ * \param[in] node_name Node name
+ * \param[in] operation If specified, only clear if matching this operation
+ * \param[in] interval_ms If operation is specified, it has this interval
+ */
+void
+lrm_clear_last_failure(const char *rsc_id, const char *node_name,
+ const char *operation, guint interval_ms)
+{
+ lrm_state_t *lrm_state = lrm_state_find(node_name);
+
+ if (lrm_state == NULL) {
+ return;
+ }
+ if (lrm_state->resource_history != NULL) {
+ rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history,
+ rsc_id);
+
+ if (last_failed_matches_op(entry, operation, interval_ms)) {
+ lrmd_free_event(entry->failed);
+ entry->failed = NULL;
+ }
+ }
+}
+
+/* Returns: gboolean - cancellation is in progress */
+static gboolean
+cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove)
+{
+ int rc = pcmk_ok;
+ char *local_key = NULL;
+ active_op_t *pending = NULL;
+
+ CRM_CHECK(op != 0, return FALSE);
+ CRM_CHECK(rsc_id != NULL, return FALSE);
+ if (key == NULL) {
+ local_key = make_stop_id(rsc_id, op);
+ key = local_key;
+ }
+ pending = g_hash_table_lookup(lrm_state->active_ops, key);
+
+ if (pending) {
+ if (remove && !pcmk_is_set(pending->flags, active_op_remove)) {
+ controld_set_active_op_flags(pending, active_op_remove);
+ crm_debug("Scheduling %s for removal", key);
+ }
+
+ if (pcmk_is_set(pending->flags, active_op_cancelled)) {
+ crm_debug("Operation %s already cancelled", key);
+ free(local_key);
+ return FALSE;
+ }
+ controld_set_active_op_flags(pending, active_op_cancelled);
+
+ } else {
+ crm_info("No pending op found for %s", key);
+ free(local_key);
+ return FALSE;
+ }
+
+ crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key);
+ rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type,
+ pending->interval_ms);
+ if (rc == pcmk_ok) {
+ crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key);
+ free(local_key);
+ return TRUE;
+ }
+
+ crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key);
+ /* The caller needs to make sure the entry is
+ * removed from the active operations list
+ *
+ * Usually by returning TRUE inside the worker function
+ * supplied to g_hash_table_foreach_remove()
+ *
+ * Not removing the entry from active operations will block
+ * the node from shutting down
+ */
+ free(local_key);
+ return FALSE;
+}
+
+struct cancel_data {
+ gboolean done;
+ gboolean remove;
+ const char *key;
+ lrmd_rsc_info_t *rsc;
+ lrm_state_t *lrm_state;
+};
+
+static gboolean
+cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
+{
+ gboolean remove = FALSE;
+ struct cancel_data *data = user_data;
+ active_op_t *op = value;
+
+ if (pcmk__str_eq(op->op_key, data->key, pcmk__str_none)) {
+ data->done = TRUE;
+ remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove);
+ }
+ return remove;
+}
+
+static gboolean
+cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove)
+{
+ guint removed = 0;
+ struct cancel_data data;
+
+ CRM_CHECK(rsc != NULL, return FALSE);
+ CRM_CHECK(key != NULL, return FALSE);
+
+ data.key = key;
+ data.rsc = rsc;
+ data.done = FALSE;
+ data.remove = remove;
+ data.lrm_state = lrm_state;
+
+ removed = g_hash_table_foreach_remove(lrm_state->active_ops,
+ cancel_action_by_key, &data);
+ crm_trace("Removed %u op cache entries, new size: %u",
+ removed, g_hash_table_size(lrm_state->active_ops));
+ return data.done;
+}
+
+/*!
+ * \internal
+ * \brief Retrieve resource information from LRM
+ *
+ * \param[in,out] lrm_state Executor connection state to use
+ * \param[in] rsc_xml XML containing resource configuration
+ * \param[in] do_create If true, register resource if not already
+ * \param[out] rsc_info Where to store information obtained from executor
+ *
+ * \retval pcmk_ok Success (and rsc_info holds newly allocated result)
+ * \retval -EINVAL Required information is missing from arguments
+ * \retval -ENOTCONN No active connection to LRM
+ * \retval -ENODEV Resource not found
+ * \retval -errno Error communicating with executor when registering resource
+ *
+ * \note Caller is responsible for freeing result on success.
+ */
+static int
+get_lrm_resource(lrm_state_t *lrm_state, const xmlNode *rsc_xml,
+ gboolean do_create, lrmd_rsc_info_t **rsc_info)
+{
+ const char *id = ID(rsc_xml);
+
+ CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL);
+ CRM_CHECK(id, return -EINVAL);
+
+ if (lrm_state_is_connected(lrm_state) == FALSE) {
+ return -ENOTCONN;
+ }
+
+ crm_trace("Retrieving resource information for %s from the executor", id);
+ *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
+
+ // If resource isn't known by ID, try clone name, if provided
+ if (!*rsc_info) {
+ const char *long_id = crm_element_value(rsc_xml, XML_ATTR_ID_LONG);
+
+ if (long_id) {
+ *rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0);
+ }
+ }
+
+ if ((*rsc_info == NULL) && do_create) {
+ const char *class = crm_element_value(rsc_xml, XML_AGENT_ATTR_CLASS);
+ const char *provider = crm_element_value(rsc_xml, XML_AGENT_ATTR_PROVIDER);
+ const char *type = crm_element_value(rsc_xml, XML_ATTR_TYPE);
+ int rc;
+
+ crm_trace("Registering resource %s with the executor", id);
+ rc = lrm_state_register_rsc(lrm_state, id, class, provider, type,
+ lrmd_opt_drop_recurring);
+ if (rc != pcmk_ok) {
+ fsa_data_t *msg_data = NULL;
+
+ crm_err("Could not register resource %s with the executor on %s: %s "
+ CRM_XS " rc=%d",
+ id, lrm_state->node_name, pcmk_strerror(rc), rc);
+
+ /* Register this as an internal error if this involves the local
+ * executor. Otherwise, we're likely dealing with an unresponsive
+ * remote node, which is not an FSA failure.
+ */
+ if (lrm_state_is_local(lrm_state) == TRUE) {
+ register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
+ }
+ return rc;
+ }
+
+ *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
+ }
+ return *rsc_info? pcmk_ok : -ENODEV;
+}
+
+static void
+delete_resource(lrm_state_t *lrm_state, const char *id, lrmd_rsc_info_t *rsc,
+ GHashTableIter *iter, const char *sys, const char *user,
+ ha_msg_input_t *request, bool unregister, bool from_cib)
+{
+ int rc = pcmk_ok;
+
+ crm_info("Removing resource %s from executor for %s%s%s",
+ id, sys, (user? " as " : ""), (user? user : ""));
+
+ if (rsc && unregister) {
+ rc = lrm_state_unregister_rsc(lrm_state, id, 0);
+ }
+
+ if (rc == pcmk_ok) {
+ crm_trace("Resource %s deleted from executor", id);
+ } else if (rc == -EINPROGRESS) {
+ crm_info("Deletion of resource '%s' from executor is pending", id);
+ if (request) {
+ struct pending_deletion_op_s *op = NULL;
+ char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE);
+
+ op = calloc(1, sizeof(struct pending_deletion_op_s));
+ op->rsc = strdup(rsc->id);
+ op->input = copy_ha_msg_input(request);
+ g_hash_table_insert(lrm_state->deletion_ops, ref, op);
+ }
+ return;
+ } else {
+ crm_warn("Could not delete '%s' from executor for %s%s%s: %s "
+ CRM_XS " rc=%d", id, sys, (user? " as " : ""),
+ (user? user : ""), pcmk_strerror(rc), rc);
+ }
+
+ delete_rsc_entry(lrm_state, request, id, iter, rc, user, from_cib);
+}
+
+static int
+get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id)
+{
+ int call_id = 999999999;
+ rsc_history_t *entry = NULL;
+
+ if(lrm_state) {
+ entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
+ }
+
+ /* Make sure the call id is greater than the last successful operation,
+ * otherwise the failure will not result in a possible recovery of the resource
+ * as it could appear the failure occurred before the successful start */
+ if (entry) {
+ call_id = entry->last_callid + 1;
+ }
+
+ if (call_id < 0) {
+ call_id = 1;
+ }
+ return call_id;
+}
+
+static void
+fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status,
+ enum ocf_exitcode op_exitcode, const char *exit_reason)
+{
+ op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
+ op->t_run = time(NULL);
+ op->t_rcchange = op->t_run;
+ lrmd__set_result(op, op_exitcode, op_status, exit_reason);
+}
+
+static void
+force_reprobe(lrm_state_t *lrm_state, const char *from_sys,
+ const char *from_host, const char *user_name,
+ gboolean is_remote_node, bool reprobe_all_nodes)
+{
+ GHashTableIter gIter;
+ rsc_history_t *entry = NULL;
+
+ crm_info("Clearing resource history on node %s", lrm_state->node_name);
+ g_hash_table_iter_init(&gIter, lrm_state->resource_history);
+ while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
+ /* only unregister the resource during a reprobe if it is not a remote connection
+ * resource. otherwise unregistering the connection will terminate remote-node
+ * membership */
+ bool unregister = true;
+
+ if (is_remote_lrmd_ra(NULL, NULL, entry->id)) {
+ unregister = false;
+
+ if (reprobe_all_nodes) {
+ lrm_state_t *remote_lrm_state = lrm_state_find(entry->id);
+
+ if (remote_lrm_state != NULL) {
+ /* If reprobing all nodes, be sure to reprobe the remote
+ * node before clearing its connection resource
+ */
+ force_reprobe(remote_lrm_state, from_sys, from_host,
+ user_name, TRUE, reprobe_all_nodes);
+ }
+ }
+ }
+
+ /* Don't delete from the CIB, since we'll delete the whole node's LRM
+ * state from the CIB soon
+ */
+ delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys,
+ user_name, NULL, unregister, false);
+ }
+
+ /* Now delete the copy in the CIB */
+ controld_delete_node_state(lrm_state->node_name, controld_section_lrm,
+ cib_scope_local);
+
+ // @COMPAT DCs < 1.1.14 need this deleted (in case it was explicitly false)
+ update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node);
+}
+
+/*!
+ * \internal
+ * \brief Fail a requested action without actually executing it
+ *
+ * For an action that can't be executed, process it similarly to an actual
+ * execution result, with specified error status (except for notify actions,
+ * which will always be treated as successful).
+ *
+ * \param[in,out] lrm_state Executor connection that action is for
+ * \param[in] action Action XML from request
+ * \param[in] rc Desired return code to use
+ * \param[in] op_status Desired operation status to use
+ * \param[in] exit_reason Human-friendly detail, if error
+ */
+static void
+synthesize_lrmd_failure(lrm_state_t *lrm_state, const xmlNode *action,
+ int op_status, enum ocf_exitcode rc,
+ const char *exit_reason)
+{
+ lrmd_event_data_t *op = NULL;
+ const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK);
+ const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET);
+ xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE);
+
+ if ((xml_rsc == NULL) || (ID(xml_rsc) == NULL)) {
+ /* @TODO Should we do something else, like direct ack? */
+ crm_info("Can't fake %s failure (%d) on %s without resource configuration",
+ crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc,
+ target_node);
+ return;
+
+ } else if(operation == NULL) {
+ /* This probably came from crm_resource -C, nothing to do */
+ crm_info("Can't fake %s failure (%d) on %s without operation",
+ ID(xml_rsc), rc, target_node);
+ return;
+ }
+
+ op = construct_op(lrm_state, action, ID(xml_rsc), operation);
+
+ if (pcmk__str_eq(operation, RSC_NOTIFY, pcmk__str_casei)) { // Notifications can't fail
+ fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_OK, NULL);
+ } else {
+ fake_op_status(lrm_state, op, op_status, rc, exit_reason);
+ }
+
+ crm_info("Faking " PCMK__OP_FMT " result (%d) on %s",
+ op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node);
+
+ // Process the result as if it came from the LRM
+ process_lrm_event(lrm_state, op, NULL, action);
+ lrmd_free_event(op);
+}
+
+/*!
+ * \internal
+ * \brief Get target of an LRM operation (replacing \p NULL with local node
+ * name)
+ *
+ * \param[in] xml LRM operation data XML
+ *
+ * \return LRM operation target node name (local node or Pacemaker Remote node)
+ */
+static const char *
+lrm_op_target(const xmlNode *xml)
+{
+ const char *target = NULL;
+
+ if (xml) {
+ target = crm_element_value(xml, XML_LRM_ATTR_TARGET);
+ }
+ if (target == NULL) {
+ target = controld_globals.our_nodename;
+ }
+ return target;
+}
+
+static void
+fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name,
+ const char *from_host, const char *from_sys)
+{
+ lrmd_event_data_t *op = NULL;
+ lrmd_rsc_info_t *rsc = NULL;
+ xmlNode *xml_rsc = find_xml_node(xml, XML_CIB_TAG_RESOURCE, TRUE);
+
+ CRM_CHECK(xml_rsc != NULL, return);
+
+ /* The executor simply executes operations and reports the results, without
+ * any concept of success or failure, so to fail a resource, we must fake
+ * what a failure looks like.
+ *
+ * To do this, we create a fake executor operation event for the resource,
+ * and pass that event to the executor client callback so it will be
+ * processed as if it came from the executor.
+ */
+ op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon");
+
+ free((char*) op->user_data);
+ op->user_data = NULL;
+ op->interval_ms = 0;
+
+ if (user_name && !pcmk__is_privileged(user_name)) {
+ crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc));
+ fake_op_status(lrm_state, op, PCMK_EXEC_ERROR,
+ PCMK_OCF_INSUFFICIENT_PRIV,
+ "Unprivileged user cannot fail resources");
+ controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc));
+ lrmd_free_event(op);
+ return;
+ }
+
+
+ if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) {
+ crm_info("Failing resource %s...", rsc->id);
+ fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_UNKNOWN_ERROR,
+ "Simulated failure");
+ process_lrm_event(lrm_state, op, NULL, xml);
+ op->rc = PCMK_OCF_OK; // The request to fail the resource succeeded
+ lrmd_free_rsc_info(rsc);
+
+ } else {
+ crm_info("Cannot find/create resource in order to fail it...");
+ crm_log_xml_warn(xml, "bad input");
+ fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_UNKNOWN_ERROR,
+ "Cannot fail unknown resource");
+ }
+
+ controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc));
+ lrmd_free_event(op);
+}
+
+static void
+handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys,
+ const char *from_host, const char *user_name,
+ gboolean is_remote_node, bool reprobe_all_nodes)
+{
+ crm_notice("Forcing the status of all resources to be redetected");
+ force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node,
+ reprobe_all_nodes);
+
+ if (!pcmk__strcase_any_of(from_sys, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) {
+
+ xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host,
+ from_sys, CRM_SYSTEM_LRMD,
+ controld_globals.our_uuid);
+
+ crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host);
+
+ if (relay_message(reply, TRUE) == FALSE) {
+ crm_log_xml_err(reply, "Unable to route reply");
+ }
+ free_xml(reply);
+ }
+}
+
+static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state,
+ lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys)
+{
+ char *op_key = NULL;
+ char *meta_key = NULL;
+ int call = 0;
+ const char *call_id = NULL;
+ const char *op_task = NULL;
+ guint interval_ms = 0;
+ gboolean in_progress = FALSE;
+ xmlNode *params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE);
+
+ CRM_CHECK(params != NULL, return FALSE);
+
+ meta_key = crm_meta_name(XML_LRM_ATTR_TASK);
+ op_task = crm_element_value(params, meta_key);
+ free(meta_key);
+ CRM_CHECK(op_task != NULL, return FALSE);
+
+ meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS);
+ if (crm_element_value_ms(params, meta_key, &interval_ms) != pcmk_ok) {
+ free(meta_key);
+ return FALSE;
+ }
+ free(meta_key);
+
+ op_key = pcmk__op_key(rsc->id, op_task, interval_ms);
+
+ meta_key = crm_meta_name(XML_LRM_ATTR_CALLID);
+ call_id = crm_element_value(params, meta_key);
+ free(meta_key);
+
+ crm_debug("Scheduler requested op %s (call=%s) be cancelled",
+ op_key, (call_id? call_id : "NA"));
+ pcmk__scan_min_int(call_id, &call, 0);
+ if (call == 0) {
+ // Normal case when the scheduler cancels a recurring op
+ in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE);
+
+ } else {
+ // Normal case when the scheduler cancels an orphan op
+ in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE);
+ }
+
+ // Acknowledge cancellation operation if for a remote connection resource
+ if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
+ char *op_id = make_stop_id(rsc->id, call);
+
+ if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) {
+ crm_info("Nothing known about operation %d for %s", call, op_key);
+ }
+ controld_delete_action_history_by_key(rsc->id, lrm_state->node_name,
+ op_key, call);
+ send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
+ from_host, from_sys);
+
+ /* needed at least for cancellation of a remote operation */
+ if (lrm_state->active_ops != NULL) {
+ g_hash_table_remove(lrm_state->active_ops, op_id);
+ }
+ free(op_id);
+
+ } else {
+ /* No ack is needed since abcdaa8, but peers with older versions
+ * in a rolling upgrade need one. We didn't bump the feature set
+ * at that commit, so we can only compare against the previous
+ * CRM version (3.0.8). If any peers have feature set 3.0.9 but
+ * not abcdaa8, they will time out waiting for the ack (no
+ * released versions of Pacemaker are affected).
+ */
+ const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION);
+
+ if (compare_version(peer_version, "3.0.8") <= 0) {
+ crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)",
+ op_key, from_host, peer_version);
+ send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
+ from_host, from_sys);
+ }
+ }
+
+ free(op_key);
+ return TRUE;
+}
+
+static void
+do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state,
+ lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host,
+ bool crm_rsc_delete, const char *user_name)
+{
+ bool unregister = true;
+ int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name,
+ user_name,
+ cib_dryrun|cib_sync_call);
+
+ if (cib_rc != pcmk_rc_ok) {
+ lrmd_event_data_t *op = NULL;
+
+ op = construct_op(lrm_state, input->xml, rsc->id, CRMD_ACTION_DELETE);
+
+ /* These are resource clean-ups, not actions, so no exit reason is
+ * needed.
+ */
+ lrmd__set_result(op, pcmk_rc2ocf(cib_rc), PCMK_EXEC_ERROR, NULL);
+ controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id);
+ lrmd_free_event(op);
+ return;
+ }
+
+ if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
+ unregister = false;
+ }
+
+ delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys,
+ user_name, input, unregister, true);
+}
+
+// User data for asynchronous metadata execution
+struct metadata_cb_data {
+ lrmd_rsc_info_t *rsc; // Copy of resource information
+ xmlNode *input_xml; // Copy of FSA input XML
+};
+
+static struct metadata_cb_data *
+new_metadata_cb_data(lrmd_rsc_info_t *rsc, xmlNode *input_xml)
+{
+ struct metadata_cb_data *data = NULL;
+
+ data = calloc(1, sizeof(struct metadata_cb_data));
+ CRM_ASSERT(data != NULL);
+ data->input_xml = copy_xml(input_xml);
+ data->rsc = lrmd_copy_rsc_info(rsc);
+ return data;
+}
+
+static void
+free_metadata_cb_data(struct metadata_cb_data *data)
+{
+ lrmd_free_rsc_info(data->rsc);
+ free_xml(data->input_xml);
+ free(data);
+}
+
+/*!
+ * \internal
+ * \brief Execute an action after metadata has been retrieved
+ *
+ * \param[in] pid Ignored
+ * \param[in] result Result of metadata action
+ * \param[in] user_data Metadata callback data
+ */
+static void
+metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data)
+{
+ struct metadata_cb_data *data = (struct metadata_cb_data *) user_data;
+
+ struct ra_metadata_s *md = NULL;
+ lrm_state_t *lrm_state = lrm_state_find(lrm_op_target(data->input_xml));
+
+ if ((lrm_state != NULL) && pcmk__result_ok(result)) {
+ md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc,
+ result->action_stdout);
+ }
+ do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md);
+ free_metadata_cb_data(data);
+}
+
+/* A_LRM_INVOKE */
+void
+do_lrm_invoke(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ lrm_state_t *lrm_state = NULL;
+ const char *crm_op = NULL;
+ const char *from_sys = NULL;
+ const char *from_host = NULL;
+ const char *operation = NULL;
+ ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
+ const char *user_name = NULL;
+ const char *target_node = lrm_op_target(input->xml);
+ gboolean is_remote_node = FALSE;
+ bool crm_rsc_delete = FALSE;
+
+ // Message routed to the local node is targeting a specific, non-local node
+ is_remote_node = !pcmk__str_eq(target_node, controld_globals.our_nodename,
+ pcmk__str_casei);
+
+ lrm_state = lrm_state_find(target_node);
+ if ((lrm_state == NULL) && is_remote_node) {
+ crm_err("Failing action because local node has never had connection to remote node %s",
+ target_node);
+ synthesize_lrmd_failure(NULL, input->xml, PCMK_EXEC_NOT_CONNECTED,
+ PCMK_OCF_UNKNOWN_ERROR,
+ "Local node has no connection to remote");
+ return;
+ }
+ CRM_ASSERT(lrm_state != NULL);
+
+ user_name = pcmk__update_acl_user(input->msg, F_CRM_USER, NULL);
+ crm_op = crm_element_value(input->msg, F_CRM_TASK);
+ from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
+ if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
+ from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
+ }
+
+ if (pcmk__str_eq(crm_op, CRM_OP_LRM_DELETE, pcmk__str_none)) {
+ if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
+ crm_rsc_delete = TRUE; // from crm_resource
+ }
+ operation = CRMD_ACTION_DELETE;
+
+ } else if (input->xml != NULL) {
+ operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK);
+ }
+
+ CRM_CHECK(!pcmk__str_empty(crm_op) || !pcmk__str_empty(operation), return);
+
+ crm_trace("'%s' execution request from %s as %s user",
+ pcmk__s(crm_op, operation),
+ pcmk__s(from_sys, "unknown subsystem"),
+ pcmk__s(user_name, "current"));
+
+ if (pcmk__str_eq(crm_op, CRM_OP_LRM_FAIL, pcmk__str_none)) {
+ fail_lrm_resource(input->xml, lrm_state, user_name, from_host,
+ from_sys);
+
+ } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_REFRESH, pcmk__str_none)) {
+ /* @COMPAT This can only be sent by crm_resource --refresh on a
+ * Pacemaker Remote node running Pacemaker 1.1.9, which is extremely
+ * unlikely. It previously would cause the controller to re-write its
+ * resource history to the CIB. Just ignore it.
+ */
+ crm_notice("Ignoring refresh request from Pacemaker Remote 1.1.9 node");
+
+ // @COMPAT DCs <1.1.14 in a rolling upgrade might schedule this op
+ } else if (pcmk__str_eq(operation, CRM_OP_PROBED, pcmk__str_none)) {
+ update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE,
+ user_name, is_remote_node);
+
+ } else if (pcmk__str_eq(crm_op, CRM_OP_REPROBE, pcmk__str_none)
+ || pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) {
+ const char *raw_target = NULL;
+
+ if (input->xml != NULL) {
+ // For CRM_OP_REPROBE, a NULL target means we're targeting all nodes
+ raw_target = crm_element_value(input->xml, XML_LRM_ATTR_TARGET);
+ }
+ handle_reprobe_op(lrm_state, from_sys, from_host, user_name,
+ is_remote_node, (raw_target == NULL));
+
+ } else if (operation != NULL) {
+ lrmd_rsc_info_t *rsc = NULL;
+ xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
+ gboolean create_rsc = !pcmk__str_eq(operation, CRMD_ACTION_DELETE,
+ pcmk__str_none);
+ int rc;
+
+ // We can't return anything meaningful without a resource ID
+ CRM_CHECK(xml_rsc && ID(xml_rsc), return);
+
+ rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc);
+ if (rc == -ENOTCONN) {
+ synthesize_lrmd_failure(lrm_state, input->xml,
+ PCMK_EXEC_NOT_CONNECTED,
+ PCMK_OCF_UNKNOWN_ERROR,
+ "Not connected to remote executor");
+ return;
+
+ } else if ((rc < 0) && !create_rsc) {
+ /* Delete of malformed or nonexistent resource
+ * (deleting something that does not exist is a success)
+ */
+ crm_notice("Not registering resource '%s' for a %s event "
+ CRM_XS " get-rc=%d (%s) transition-key=%s",
+ ID(xml_rsc), operation,
+ rc, pcmk_strerror(rc), ID(input->xml));
+ delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok,
+ user_name, true);
+ return;
+
+ } else if (rc == -EINVAL) {
+ // Resource operation on malformed resource
+ crm_err("Invalid resource definition for %s", ID(xml_rsc));
+ crm_log_xml_warn(input->msg, "invalid resource");
+ synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR,
+ PCMK_OCF_NOT_CONFIGURED, // fatal error
+ "Invalid resource definition");
+ return;
+
+ } else if (rc < 0) {
+ // Error communicating with the executor
+ crm_err("Could not register resource '%s' with executor: %s "
+ CRM_XS " rc=%d",
+ ID(xml_rsc), pcmk_strerror(rc), rc);
+ crm_log_xml_warn(input->msg, "failed registration");
+ synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR,
+ PCMK_OCF_INVALID_PARAM, // hard error
+ "Could not register resource with executor");
+ return;
+ }
+
+ if (pcmk__str_eq(operation, CRMD_ACTION_CANCEL, pcmk__str_none)) {
+ if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) {
+ crm_log_xml_warn(input->xml, "Bad command");
+ }
+
+ } else if (pcmk__str_eq(operation, CRMD_ACTION_DELETE, pcmk__str_none)) {
+ do_lrm_delete(input, lrm_state, rsc, from_sys, from_host,
+ crm_rsc_delete, user_name);
+
+ } else {
+ struct ra_metadata_s *md = NULL;
+
+ /* Getting metadata from cache is OK except for start actions --
+ * always refresh from the agent for those, in case the resource
+ * agent was updated.
+ *
+ * @TODO Only refresh metadata for starts if the agent actually
+ * changed (using something like inotify, or a hash or modification
+ * time of the agent executable).
+ */
+ if (strcmp(operation, CRMD_ACTION_START) != 0) {
+ md = controld_get_rsc_metadata(lrm_state, rsc,
+ controld_metadata_from_cache);
+ }
+
+ if ((md == NULL) && crm_op_needs_metadata(rsc->standard,
+ operation)) {
+ /* Most likely, we'll need the agent metadata to record the
+ * pending operation and the operation result. Get it now rather
+ * than wait until then, so the metadata action doesn't eat into
+ * the real action's timeout.
+ *
+ * @TODO Metadata is retrieved via direct execution of the
+ * agent, which has a couple of related issues: the executor
+ * should execute agents, not the controller; and metadata for
+ * Pacemaker Remote nodes should be collected on those nodes,
+ * not locally.
+ */
+ struct metadata_cb_data *data = NULL;
+
+ data = new_metadata_cb_data(rsc, input->xml);
+ crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously",
+ rsc->id, rsc->standard,
+ ((rsc->provider == NULL)? "" : ":"),
+ ((rsc->provider == NULL)? "" : rsc->provider),
+ rsc->type);
+ (void) lrmd__metadata_async(rsc, metadata_complete,
+ (void *) data);
+ } else {
+ do_lrm_rsc_op(lrm_state, rsc, input->xml, md);
+ }
+ }
+
+ lrmd_free_rsc_info(rsc);
+
+ } else {
+ crm_err("Invalid execution request: unknown command '%s' (bug?)",
+ crm_op);
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ }
+}
+
+static lrmd_event_data_t *
+construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op,
+ const char *rsc_id, const char *operation)
+{
+ lrmd_event_data_t *op = NULL;
+ const char *op_delay = NULL;
+ const char *op_timeout = NULL;
+ GHashTable *params = NULL;
+
+ xmlNode *primitive = NULL;
+ const char *class = NULL;
+
+ const char *transition = NULL;
+
+ CRM_ASSERT(rsc_id && operation);
+
+ op = lrmd_new_event(rsc_id, operation, 0);
+ op->type = lrmd_event_exec_complete;
+ op->timeout = 0;
+ op->start_delay = 0;
+ lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL);
+
+ if (rsc_op == NULL) {
+ CRM_LOG_ASSERT(pcmk__str_eq(CRMD_ACTION_STOP, operation, pcmk__str_casei));
+ op->user_data = NULL;
+ /* the stop_all_resources() case
+ * by definition there is no DC (or they'd be shutting
+ * us down).
+ * So we should put our version here.
+ */
+ op->params = pcmk__strkey_table(free, free);
+
+ g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET));
+
+ crm_trace("Constructed %s op for %s", operation, rsc_id);
+ return op;
+ }
+
+ params = xml2list(rsc_op);
+ g_hash_table_remove(params, CRM_META "_op_target_rc");
+
+ op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY);
+ pcmk__scan_min_int(op_delay, &op->start_delay, 0);
+
+ op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT);
+ pcmk__scan_min_int(op_timeout, &op->timeout, 0);
+
+ if (pcmk__guint_from_hash(params, CRM_META "_" XML_LRM_ATTR_INTERVAL_MS, 0,
+ &(op->interval_ms)) != pcmk_rc_ok) {
+ op->interval_ms = 0;
+ }
+
+ /* Use pcmk_monitor_timeout instead of meta timeout for stonith
+ recurring monitor, if set */
+ primitive = find_xml_node(rsc_op, XML_CIB_TAG_RESOURCE, FALSE);
+ class = crm_element_value(primitive, XML_AGENT_ATTR_CLASS);
+
+ if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_fence_params)
+ && pcmk__str_eq(operation, CRMD_ACTION_STATUS, pcmk__str_casei)
+ && (op->interval_ms > 0)) {
+
+ op_timeout = g_hash_table_lookup(params, "pcmk_monitor_timeout");
+ if (op_timeout != NULL) {
+ op->timeout = crm_get_msec(op_timeout);
+ }
+ }
+
+ if (!pcmk__str_eq(operation, RSC_STOP, pcmk__str_casei)) {
+ op->params = params;
+
+ } else {
+ rsc_history_t *entry = NULL;
+
+ if (lrm_state) {
+ entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
+ }
+
+ /* If we do not have stop parameters cached, use
+ * whatever we are given */
+ if (!entry || !entry->stop_params) {
+ op->params = params;
+ } else {
+ /* Copy the cached parameter list so that we stop the resource
+ * with the old attributes, not the new ones */
+ op->params = pcmk__strkey_table(free, free);
+
+ g_hash_table_foreach(params, copy_meta_keys, op->params);
+ g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params);
+ g_hash_table_destroy(params);
+ params = NULL;
+ }
+ }
+
+ /* sanity */
+ if (op->timeout <= 0) {
+ op->timeout = op->interval_ms;
+ }
+ if (op->start_delay < 0) {
+ op->start_delay = 0;
+ }
+
+ transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY);
+ CRM_CHECK(transition != NULL, return op);
+
+ op->user_data = strdup(transition);
+
+ if (op->interval_ms != 0) {
+ if (pcmk__strcase_any_of(operation, CRMD_ACTION_START, CRMD_ACTION_STOP, NULL)) {
+ crm_err("Start and Stop actions cannot have an interval: %u",
+ op->interval_ms);
+ op->interval_ms = 0;
+ }
+ }
+
+ crm_trace("Constructed %s op for %s: interval=%u",
+ operation, rsc_id, op->interval_ms);
+
+ return op;
+}
+
+/*!
+ * \internal
+ * \brief Send a (synthesized) event result
+ *
+ * Reply with a synthesized event result directly, as opposed to going through
+ * the executor.
+ *
+ * \param[in] to_host Host to send result to
+ * \param[in] to_sys IPC name to send result (NULL for transition engine)
+ * \param[in] rsc Type information about resource the result is for
+ * \param[in,out] op Event with result to send
+ * \param[in] rsc_id ID of resource the result is for
+ */
+void
+controld_ack_event_directly(const char *to_host, const char *to_sys,
+ const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op,
+ const char *rsc_id)
+{
+ xmlNode *reply = NULL;
+ xmlNode *update, *iter;
+ crm_node_t *peer = NULL;
+
+ CRM_CHECK(op != NULL, return);
+ if (op->rsc_id == NULL) {
+ CRM_ASSERT(rsc_id != NULL);
+ op->rsc_id = strdup(rsc_id);
+ }
+ if (to_sys == NULL) {
+ to_sys = CRM_SYSTEM_TENGINE;
+ }
+
+ peer = crm_get_peer(0, controld_globals.our_nodename);
+ update = create_node_state_update(peer, node_update_none, NULL,
+ __func__);
+
+ iter = create_xml_node(update, XML_CIB_TAG_LRM);
+ crm_xml_add(iter, XML_ATTR_ID, controld_globals.our_uuid);
+ iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
+ iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
+
+ crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
+
+ controld_add_resource_history_xml(iter, rsc, op,
+ controld_globals.our_nodename);
+ reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL);
+
+ crm_log_xml_trace(update, "[direct ACK]");
+
+ crm_debug("ACK'ing resource op " PCMK__OP_FMT " from %s: %s",
+ op->rsc_id, op->op_type, op->interval_ms, op->user_data,
+ crm_element_value(reply, XML_ATTR_REFERENCE));
+
+ if (relay_message(reply, TRUE) == FALSE) {
+ crm_log_xml_err(reply, "Unable to route reply");
+ }
+
+ free_xml(update);
+ free_xml(reply);
+}
+
+gboolean
+verify_stopped(enum crmd_fsa_state cur_state, int log_level)
+{
+ gboolean res = TRUE;
+ GList *lrm_state_list = lrm_state_get_list();
+ GList *state_entry;
+
+ for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
+ lrm_state_t *lrm_state = state_entry->data;
+
+ if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) {
+ /* keep iterating through all even when false is returned */
+ res = FALSE;
+ }
+ }
+
+ controld_set_fsa_input_flags(R_SENT_RSC_STOP);
+ g_list_free(lrm_state_list); lrm_state_list = NULL;
+ return res;
+}
+
+struct stop_recurring_action_s {
+ lrmd_rsc_info_t *rsc;
+ lrm_state_t *lrm_state;
+};
+
+static gboolean
+stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
+{
+ gboolean remove = FALSE;
+ struct stop_recurring_action_s *event = user_data;
+ active_op_t *op = value;
+
+ if ((op->interval_ms != 0)
+ && pcmk__str_eq(op->rsc_id, event->rsc->id, pcmk__str_none)) {
+
+ crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key);
+ remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE);
+ }
+
+ return remove;
+}
+
+static gboolean
+stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
+{
+ gboolean remove = FALSE;
+ lrm_state_t *lrm_state = user_data;
+ active_op_t *op = value;
+
+ if (op->interval_ms != 0) {
+ crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id,
+ (const char *) key);
+ remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE);
+ }
+
+ return remove;
+}
+
+/*!
+ * \internal
+ * \brief Check whether recurring actions should be cancelled before an action
+ *
+ * \param[in] rsc_id Resource that action is for
+ * \param[in] action Action being performed
+ * \param[in] interval_ms Operation interval of \p action (in milliseconds)
+ *
+ * \return true if recurring actions should be cancelled, otherwise false
+ */
+static bool
+should_cancel_recurring(const char *rsc_id, const char *action, guint interval_ms)
+{
+ if (is_remote_lrmd_ra(NULL, NULL, rsc_id) && (interval_ms == 0)
+ && (strcmp(action, CRMD_ACTION_MIGRATE) == 0)) {
+ /* Don't stop monitoring a migrating Pacemaker Remote connection
+ * resource until the entire migration has completed. We must detect if
+ * the connection is unexpectedly severed, even during a migration.
+ */
+ return false;
+ }
+
+ // Cancel recurring actions before changing resource state
+ return (interval_ms == 0)
+ && !pcmk__str_any_of(action, CRMD_ACTION_STATUS, CRMD_ACTION_NOTIFY,
+ NULL);
+}
+
+/*!
+ * \internal
+ * \brief Check whether an action should not be performed at this time
+ *
+ * \param[in] operation Action to be performed
+ *
+ * \return Readable description of why action should not be performed,
+ * or NULL if it should be performed
+ */
+static const char *
+should_nack_action(const char *action)
+{
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)
+ && pcmk__str_eq(action, RSC_START, pcmk__str_none)) {
+
+ register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
+ return "Not attempting start due to shutdown in progress";
+ }
+
+ switch (controld_globals.fsa_state) {
+ case S_NOT_DC:
+ case S_POLICY_ENGINE: // Recalculating
+ case S_TRANSITION_ENGINE:
+ break;
+ default:
+ if (!pcmk__str_eq(action, CRMD_ACTION_STOP, pcmk__str_none)) {
+ return "Controller cannot attempt actions at this time";
+ }
+ break;
+ }
+ return NULL;
+}
+
+static void
+do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg,
+ struct ra_metadata_s *md)
+{
+ int rc;
+ int call_id = 0;
+ char *op_id = NULL;
+ lrmd_event_data_t *op = NULL;
+ fsa_data_t *msg_data = NULL;
+ const char *transition = NULL;
+ const char *operation = NULL;
+ const char *nack_reason = NULL;
+
+ CRM_CHECK((rsc != NULL) && (msg != NULL), return);
+
+ operation = crm_element_value(msg, XML_LRM_ATTR_TASK);
+ CRM_CHECK(!pcmk__str_empty(operation), return);
+
+ transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
+ if (pcmk__str_empty(transition)) {
+ crm_log_xml_err(msg, "Missing transition number");
+ }
+
+ if (lrm_state == NULL) {
+ // This shouldn't be possible, but provide a failsafe just in case
+ crm_err("Cannot execute %s of %s: No executor connection "
+ CRM_XS " transition_key=%s",
+ operation, rsc->id, pcmk__s(transition, ""));
+ synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID,
+ PCMK_OCF_UNKNOWN_ERROR,
+ "No executor connection");
+ return;
+ }
+
+ if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD,
+ CRMD_ACTION_RELOAD_AGENT, NULL)) {
+ /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs
+ * will schedule reload-agent actions only. In either case, we need
+ * to map that to whatever the resource agent actually supports.
+ * Default to the OCF 1.1 name.
+ */
+ if ((md != NULL)
+ && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) {
+ operation = CRMD_ACTION_RELOAD;
+ } else {
+ operation = CRMD_ACTION_RELOAD_AGENT;
+ }
+ }
+
+ op = construct_op(lrm_state, msg, rsc->id, operation);
+ CRM_CHECK(op != NULL, return);
+
+ if (should_cancel_recurring(rsc->id, operation, op->interval_ms)) {
+ guint removed = 0;
+ struct stop_recurring_action_s data;
+
+ data.rsc = rsc;
+ data.lrm_state = lrm_state;
+ removed = g_hash_table_foreach_remove(lrm_state->active_ops,
+ stop_recurring_action_by_rsc,
+ &data);
+
+ if (removed) {
+ crm_debug("Stopped %u recurring operation%s in preparation for "
+ PCMK__OP_FMT, removed, pcmk__plural_s(removed),
+ rsc->id, operation, op->interval_ms);
+ }
+ }
+
+ /* now do the op */
+ crm_notice("Requesting local execution of %s operation for %s on %s "
+ CRM_XS " transition_key=%s op_key=" PCMK__OP_FMT,
+ crm_action_str(op->op_type, op->interval_ms), rsc->id, lrm_state->node_name,
+ pcmk__s(transition, ""), rsc->id, operation, op->interval_ms);
+
+ nack_reason = should_nack_action(operation);
+ if (nack_reason != NULL) {
+ crm_notice("Discarding attempt to perform action %s on %s in state %s "
+ "(shutdown=%s)", operation, rsc->id,
+ fsa_state2string(controld_globals.fsa_state),
+ pcmk__btoa(pcmk_is_set(controld_globals.fsa_input_register,
+ R_SHUTDOWN)));
+
+ lrmd__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_INVALID,
+ nack_reason);
+ controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id);
+ lrmd_free_event(op);
+ free(op_id);
+ return;
+ }
+
+ controld_record_pending_op(lrm_state->node_name, rsc, op);
+
+ op_id = pcmk__op_key(rsc->id, op->op_type, op->interval_ms);
+
+ if (op->interval_ms > 0) {
+ /* cancel it so we can then restart it without conflict */
+ cancel_op_key(lrm_state, rsc, op_id, FALSE);
+ }
+
+ rc = controld_execute_resource_agent(lrm_state, rsc->id, op->op_type,
+ op->user_data, op->interval_ms,
+ op->timeout, op->start_delay,
+ op->params, &call_id);
+ if (rc == pcmk_rc_ok) {
+ /* record all operations so we can wait
+ * for them to complete during shutdown
+ */
+ char *call_id_s = make_stop_id(rsc->id, call_id);
+ active_op_t *pending = NULL;
+
+ pending = calloc(1, sizeof(active_op_t));
+ crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);
+
+ pending->call_id = call_id;
+ pending->interval_ms = op->interval_ms;
+ pending->op_type = strdup(operation);
+ pending->op_key = strdup(op_id);
+ pending->rsc_id = strdup(rsc->id);
+ pending->start_time = time(NULL);
+ pcmk__str_update(&pending->user_data, op->user_data);
+ if (crm_element_value_epoch(msg, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
+ &(pending->lock_time)) != pcmk_ok) {
+ pending->lock_time = 0;
+ }
+ g_hash_table_replace(lrm_state->active_ops, call_id_s, pending);
+
+ if ((op->interval_ms > 0)
+ && (op->start_delay > START_DELAY_THRESHOLD)) {
+ int target_rc = PCMK_OCF_OK;
+
+ crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id);
+ decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc);
+ lrmd__set_result(op, target_rc, PCMK_EXEC_DONE, NULL);
+ controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id);
+ }
+
+ pending->params = op->params;
+ op->params = NULL;
+
+ } else if (lrm_state_is_local(lrm_state)) {
+ crm_err("Could not initiate %s action for resource %s locally: %s "
+ CRM_XS " rc=%d", operation, rsc->id, pcmk_rc_str(rc), rc);
+ fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED,
+ PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc));
+ process_lrm_event(lrm_state, op, NULL, NULL);
+ register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
+
+ } else {
+ crm_err("Could not initiate %s action for resource %s remotely on %s: "
+ "%s " CRM_XS " rc=%d",
+ operation, rsc->id, lrm_state->node_name, pcmk_rc_str(rc), rc);
+ fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED,
+ PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc));
+ process_lrm_event(lrm_state, op, NULL, NULL);
+ }
+
+ free(op_id);
+ lrmd_free_event(op);
+}
+
+void
+do_lrm_event(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)
+{
+ CRM_CHECK(FALSE, return);
+}
+
+static char *
+unescape_newlines(const char *string)
+{
+ char *pch = NULL;
+ char *ret = NULL;
+ static const char *escaped_newline = "\\n";
+
+ if (!string) {
+ return NULL;
+ }
+
+ ret = strdup(string);
+ pch = strstr(ret, escaped_newline);
+ while (pch != NULL) {
+ /* Replace newline escape pattern with actual newline (and a space so we
+ * don't have to shuffle the rest of the buffer)
+ */
+ pch[0] = '\n';
+ pch[1] = ' ';
+ pch = strstr(pch, escaped_newline);
+ }
+
+ return ret;
+}
+
+static bool
+did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id,
+ const char * op_type, guint interval_ms)
+{
+ rsc_history_t *entry = NULL;
+
+ CRM_CHECK(lrm_state != NULL, return FALSE);
+ CRM_CHECK(rsc_id != NULL, return FALSE);
+ CRM_CHECK(op_type != NULL, return FALSE);
+
+ entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
+ if (entry == NULL || entry->failed == NULL) {
+ return FALSE;
+ }
+
+ if (pcmk__str_eq(entry->failed->rsc_id, rsc_id, pcmk__str_none)
+ && pcmk__str_eq(entry->failed->op_type, op_type, pcmk__str_casei)
+ && entry->failed->interval_ms == interval_ms) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+/*!
+ * \internal
+ * \brief Log the result of an executor action (actual or synthesized)
+ *
+ * \param[in] op Executor action to log result for
+ * \param[in] op_key Operation key for action
+ * \param[in] node_name Name of node action was performed on, if known
+ * \param[in] confirmed Whether to log that graph action was confirmed
+ */
+static void
+log_executor_event(const lrmd_event_data_t *op, const char *op_key,
+ const char *node_name, gboolean confirmed)
+{
+ int log_level = LOG_ERR;
+ GString *str = g_string_sized_new(100); // reasonable starting size
+
+ pcmk__g_strcat(str,
+ "Result of ", crm_action_str(op->op_type, op->interval_ms),
+ " operation for ", op->rsc_id, NULL);
+
+ if (node_name != NULL) {
+ pcmk__g_strcat(str, " on ", node_name, NULL);
+ }
+
+ switch (op->op_status) {
+ case PCMK_EXEC_DONE:
+ log_level = LOG_NOTICE;
+ pcmk__g_strcat(str, ": ", services_ocf_exitcode_str(op->rc), NULL);
+ break;
+
+ case PCMK_EXEC_TIMEOUT:
+ pcmk__g_strcat(str,
+ ": ", pcmk_exec_status_str(op->op_status), " after ",
+ pcmk__readable_interval(op->timeout), NULL);
+ break;
+
+ case PCMK_EXEC_CANCELLED:
+ log_level = LOG_INFO;
+ /* order of __attribute__ and Fall through comment is IMPORTANT!
+ * do not change it without proper testing with both clang and gcc
+ * in multiple versions.
+ * the clang check allows to build with all versions of clang.
+ * the has_c_attribute check is to workaround a bug in clang version
+ * in rhel7. has_attribute would happily return "YES SIR WE GOT IT"
+ * and fail the build the next line.
+ */
+#ifdef __clang__
+#ifdef __has_c_attribute
+#if __has_attribute(fallthrough)
+ __attribute__((fallthrough));
+#endif
+#endif
+#endif
+ // Fall through
+ default:
+ pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status),
+ NULL);
+ }
+
+ if ((op->exit_reason != NULL)
+ && ((op->op_status != PCMK_EXEC_DONE) || (op->rc != PCMK_OCF_OK))) {
+
+ pcmk__g_strcat(str, " (", op->exit_reason, ")", NULL);
+ }
+
+ g_string_append(str, " " CRM_XS);
+ g_string_append_printf(str, " graph action %sconfirmed; call=%d key=%s",
+ (confirmed? "" : "un"), op->call_id, op_key);
+ if (op->op_status == PCMK_EXEC_DONE) {
+ g_string_append_printf(str, " rc=%d", op->rc);
+ }
+
+ do_crm_log(log_level, "%s", str->str);
+ g_string_free(str, TRUE);
+
+ /* The services library has already logged the output at info or debug
+ * level, so just raise to notice if it looks like a failure.
+ */
+ if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) {
+ char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output",
+ op->rsc_id, op->op_type,
+ op->interval_ms, node_name);
+
+ crm_log_output(LOG_NOTICE, prefix, op->output);
+ free(prefix);
+ }
+}
+
+void
+process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
+ active_op_t *pending, const xmlNode *action_xml)
+{
+ char *op_id = NULL;
+ char *op_key = NULL;
+
+ gboolean remove = FALSE;
+ gboolean removed = FALSE;
+ bool need_direct_ack = FALSE;
+ lrmd_rsc_info_t *rsc = NULL;
+ const char *node_name = NULL;
+
+ CRM_CHECK(op != NULL, return);
+ CRM_CHECK(op->rsc_id != NULL, return);
+
+ // Remap new status codes for older DCs
+ if (compare_version(controld_globals.dc_version, "3.2.0") < 0) {
+ switch (op->op_status) {
+ case PCMK_EXEC_NOT_CONNECTED:
+ lrmd__set_result(op, PCMK_OCF_CONNECTION_DIED,
+ PCMK_EXEC_ERROR, op->exit_reason);
+ break;
+ case PCMK_EXEC_INVALID:
+ lrmd__set_result(op, CRM_DIRECT_NACK_RC, PCMK_EXEC_ERROR,
+ op->exit_reason);
+ break;
+ default:
+ break;
+ }
+ }
+
+ op_id = make_stop_id(op->rsc_id, op->call_id);
+ op_key = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms);
+
+ // Get resource info if available (from executor state or action XML)
+ if (lrm_state) {
+ rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
+ }
+ if ((rsc == NULL) && action_xml) {
+ xmlNode *xml = find_xml_node(action_xml, XML_CIB_TAG_RESOURCE, TRUE);
+
+ const char *standard = crm_element_value(xml, XML_AGENT_ATTR_CLASS);
+ const char *provider = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER);
+ const char *type = crm_element_value(xml, XML_ATTR_TYPE);
+
+ if (standard && type) {
+ crm_info("%s agent information not cached, using %s%s%s:%s from action XML",
+ op->rsc_id, standard,
+ (provider? ":" : ""), (provider? provider : ""), type);
+ rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type);
+ } else {
+ crm_err("Can't process %s result because %s agent information not cached or in XML",
+ op_key, op->rsc_id);
+ }
+ }
+
+ // Get node name if available (from executor state or action XML)
+ if (lrm_state) {
+ node_name = lrm_state->node_name;
+ } else if (action_xml) {
+ node_name = crm_element_value(action_xml, XML_LRM_ATTR_TARGET);
+ }
+
+ if(pending == NULL) {
+ remove = TRUE;
+ if (lrm_state) {
+ pending = g_hash_table_lookup(lrm_state->active_ops, op_id);
+ }
+ }
+
+ if (op->op_status == PCMK_EXEC_ERROR) {
+ switch(op->rc) {
+ case PCMK_OCF_NOT_RUNNING:
+ case PCMK_OCF_RUNNING_PROMOTED:
+ case PCMK_OCF_DEGRADED:
+ case PCMK_OCF_DEGRADED_PROMOTED:
+ // Leave it to the TE/scheduler to decide if this is an error
+ op->op_status = PCMK_EXEC_DONE;
+ break;
+ default:
+ /* Nothing to do */
+ break;
+ }
+ }
+
+ if (op->op_status != PCMK_EXEC_CANCELLED) {
+ /* We might not record the result, so directly acknowledge it to the
+ * originator instead, so it doesn't time out waiting for the result
+ * (especially important if part of a transition).
+ */
+ need_direct_ack = TRUE;
+
+ if (controld_action_is_recordable(op->op_type)) {
+ if (node_name && rsc) {
+ // We should record the result, and happily, we can
+ time_t lock_time = (pending == NULL)? 0 : pending->lock_time;
+
+ controld_update_resource_history(node_name, rsc, op, lock_time);
+ need_direct_ack = FALSE;
+
+ } else if (op->rsc_deleted) {
+ /* We shouldn't record the result (likely the resource was
+ * refreshed, cleaned, or removed while this operation was
+ * in flight).
+ */
+ crm_notice("Not recording %s result in CIB because "
+ "resource information was removed since it was initiated",
+ op_key);
+ } else {
+ /* This shouldn't be possible; the executor didn't consider the
+ * resource deleted, but we couldn't find resource or node
+ * information.
+ */
+ crm_err("Unable to record %s result in CIB: %s", op_key,
+ (node_name? "No resource information" : "No node name"));
+ }
+ }
+
+ } else if (op->interval_ms == 0) {
+ /* A non-recurring operation was cancelled. Most likely, the
+ * never-initiated action was removed from the executor's pending
+ * operations list upon resource removal.
+ */
+ need_direct_ack = TRUE;
+
+ } else if (pending == NULL) {
+ /* This recurring operation was cancelled, but was not pending. No
+ * transition actions are waiting on it, nothing needs to be done.
+ */
+
+ } else if (op->user_data == NULL) {
+ /* This recurring operation was cancelled and pending, but we don't
+ * have a transition key. This should never happen.
+ */
+ crm_err("Recurring operation %s was cancelled without transition information",
+ op_key);
+
+ } else if (pcmk_is_set(pending->flags, active_op_remove)) {
+ /* This recurring operation was cancelled (by us) and pending, and we
+ * have been waiting for it to finish.
+ */
+ if (lrm_state) {
+ controld_delete_action_history(op);
+ }
+
+ /* Directly acknowledge failed recurring actions here. The above call to
+ * controld_delete_action_history() will not erase any corresponding
+ * last_failure entry, which means that the DC won't confirm the
+ * cancellation via process_op_deletion(), and the transition would
+ * otherwise wait for the action timer to pop.
+ */
+ if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id,
+ pending->op_type, pending->interval_ms)) {
+ need_direct_ack = TRUE;
+ }
+
+ } else if (op->rsc_deleted) {
+ /* This recurring operation was cancelled (but not by us, and the
+ * executor does not have resource information, likely due to resource
+ * cleanup, refresh, or removal) and pending.
+ */
+ crm_debug("Recurring op %s was cancelled due to resource deletion",
+ op_key);
+ need_direct_ack = TRUE;
+
+ } else {
+ /* This recurring operation was cancelled (but not by us, likely by the
+ * executor before stopping the resource) and pending. We don't need to
+ * do anything special.
+ */
+ }
+
+ if (need_direct_ack) {
+ controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id);
+ }
+
+ if(remove == FALSE) {
+ /* The caller will do this afterwards, but keep the logging consistent */
+ removed = TRUE;
+
+ } else if (lrm_state && ((op->interval_ms == 0)
+ || (op->op_status == PCMK_EXEC_CANCELLED))) {
+
+ gboolean found = g_hash_table_remove(lrm_state->active_ops, op_id);
+
+ if (op->interval_ms != 0) {
+ removed = TRUE;
+ } else if (found) {
+ removed = TRUE;
+ crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed",
+ op_key, op->call_id, op_id,
+ g_hash_table_size(lrm_state->active_ops));
+ }
+ }
+
+ log_executor_event(op, op_key, node_name, removed);
+
+ if (lrm_state) {
+ if (!pcmk__str_eq(op->op_type, RSC_METADATA, pcmk__str_casei)) {
+ crmd_alert_resource_op(lrm_state->node_name, op);
+ } else if (rsc && (op->rc == PCMK_OCF_OK)) {
+ char *metadata = unescape_newlines(op->output);
+
+ controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata);
+ free(metadata);
+ }
+ }
+
+ if (op->rsc_deleted) {
+ crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key);
+ if (lrm_state) {
+ delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL,
+ true);
+ }
+ }
+
+ /* If a shutdown was escalated while operations were pending,
+ * then the FSA will be stalled right now... allow it to continue
+ */
+ controld_trigger_fsa();
+ if (lrm_state && rsc) {
+ update_history_cache(lrm_state, rsc, op);
+ }
+
+ lrmd_free_rsc_info(rsc);
+ free(op_key);
+ free(op_id);
+}
diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c
new file mode 100644
index 0000000..8c68bfc
--- /dev/null
+++ b/daemons/controld/controld_execd_state.c
@@ -0,0 +1,814 @@
+/*
+ * Copyright 2012-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <errno.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/iso8601.h>
+#include <crm/pengine/rules.h>
+#include <crm/pengine/rules_internal.h>
+#include <crm/lrmd_internal.h>
+
+#include <pacemaker-internal.h>
+#include <pacemaker-controld.h>
+
+static GHashTable *lrm_state_table = NULL;
+extern GHashTable *proxy_table;
+int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg);
+void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg));
+
+static void
+free_rsc_info(gpointer value)
+{
+ lrmd_rsc_info_t *rsc_info = value;
+
+ lrmd_free_rsc_info(rsc_info);
+}
+
+static void
+free_deletion_op(gpointer value)
+{
+ struct pending_deletion_op_s *op = value;
+
+ free(op->rsc);
+ delete_ha_msg_input(op->input);
+ free(op);
+}
+
+static void
+free_recurring_op(gpointer value)
+{
+ active_op_t *op = value;
+
+ free(op->user_data);
+ free(op->rsc_id);
+ free(op->op_type);
+ free(op->op_key);
+ if (op->params) {
+ g_hash_table_destroy(op->params);
+ }
+ free(op);
+}
+
+static gboolean
+fail_pending_op(gpointer key, gpointer value, gpointer user_data)
+{
+ lrmd_event_data_t event = { 0, };
+ lrm_state_t *lrm_state = user_data;
+ active_op_t *op = value;
+
+ crm_trace("Pre-emptively failing " PCMK__OP_FMT " on %s (call=%s, %s)",
+ op->rsc_id, op->op_type, op->interval_ms,
+ lrm_state->node_name, (char*)key, op->user_data);
+
+ event.type = lrmd_event_exec_complete;
+ event.rsc_id = op->rsc_id;
+ event.op_type = op->op_type;
+ event.user_data = op->user_data;
+ event.timeout = 0;
+ event.interval_ms = op->interval_ms;
+ lrmd__set_result(&event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_NOT_CONNECTED,
+ "Action was pending when executor connection was dropped");
+ event.t_run = (unsigned int) op->start_time;
+ event.t_rcchange = (unsigned int) op->start_time;
+
+ event.call_id = op->call_id;
+ event.remote_nodename = lrm_state->node_name;
+ event.params = op->params;
+
+ process_lrm_event(lrm_state, &event, op, NULL);
+ lrmd__reset_result(&event);
+ return TRUE;
+}
+
+gboolean
+lrm_state_is_local(lrm_state_t *lrm_state)
+{
+ return (lrm_state != NULL)
+ && pcmk__str_eq(lrm_state->node_name, controld_globals.our_nodename,
+ pcmk__str_casei);
+}
+
+/*!
+ * \internal
+ * \brief Create executor state entry for a node and add it to the state table
+ *
+ * \param[in] node_name Node to create entry for
+ *
+ * \return Newly allocated executor state object initialized for \p node_name
+ */
+static lrm_state_t *
+lrm_state_create(const char *node_name)
+{
+ lrm_state_t *state = NULL;
+
+ if (!node_name) {
+ crm_err("No node name given for lrm state object");
+ return NULL;
+ }
+
+ state = calloc(1, sizeof(lrm_state_t));
+ if (!state) {
+ return NULL;
+ }
+
+ state->node_name = strdup(node_name);
+ state->rsc_info_cache = pcmk__strkey_table(NULL, free_rsc_info);
+ state->deletion_ops = pcmk__strkey_table(free, free_deletion_op);
+ state->active_ops = pcmk__strkey_table(free, free_recurring_op);
+ state->resource_history = pcmk__strkey_table(NULL, history_free);
+ state->metadata_cache = metadata_cache_new();
+
+ g_hash_table_insert(lrm_state_table, (char *)state->node_name, state);
+ return state;
+}
+
+void
+lrm_state_destroy(const char *node_name)
+{
+ g_hash_table_remove(lrm_state_table, node_name);
+}
+
+static gboolean
+remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data)
+{
+ remote_proxy_t *proxy = value;
+ const char *node_name = user_data;
+
+ if (pcmk__str_eq(node_name, proxy->node_name, pcmk__str_casei)) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static remote_proxy_t *
+find_connected_proxy_by_node(const char * node_name)
+{
+ GHashTableIter gIter;
+ remote_proxy_t *proxy = NULL;
+
+ CRM_CHECK(proxy_table != NULL, return NULL);
+
+ g_hash_table_iter_init(&gIter, proxy_table);
+
+ while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) &proxy)) {
+ if (proxy->source
+ && pcmk__str_eq(node_name, proxy->node_name, pcmk__str_casei)) {
+ return proxy;
+ }
+ }
+
+ return NULL;
+}
+
+static void
+remote_proxy_disconnect_by_node(const char * node_name)
+{
+ remote_proxy_t *proxy = NULL;
+
+ CRM_CHECK(proxy_table != NULL, return);
+
+ while ((proxy = find_connected_proxy_by_node(node_name)) != NULL) {
+ /* mainloop_del_ipc_client() eventually calls remote_proxy_disconnected()
+ * , which removes the entry from proxy_table.
+ * Do not do this in a g_hash_table_iter_next() loop. */
+ if (proxy->source) {
+ mainloop_del_ipc_client(proxy->source);
+ }
+ }
+
+ return;
+}
+
+static void
+internal_lrm_state_destroy(gpointer data)
+{
+ lrm_state_t *lrm_state = data;
+
+ if (!lrm_state) {
+ return;
+ }
+
+ /* Rather than directly remove the recorded proxy entries from proxy_table,
+ * make sure any connected proxies get disconnected. So that
+ * remote_proxy_disconnected() will be called and as well remove the
+ * entries from proxy_table.
+ */
+ remote_proxy_disconnect_by_node(lrm_state->node_name);
+
+ crm_trace("Destroying proxy table %s with %u members",
+ lrm_state->node_name, g_hash_table_size(proxy_table));
+ // Just in case there's still any leftovers in proxy_table
+ g_hash_table_foreach_remove(proxy_table, remote_proxy_remove_by_node, (char *) lrm_state->node_name);
+ remote_ra_cleanup(lrm_state);
+ lrmd_api_delete(lrm_state->conn);
+
+ if (lrm_state->rsc_info_cache) {
+ crm_trace("Destroying rsc info cache with %u members",
+ g_hash_table_size(lrm_state->rsc_info_cache));
+ g_hash_table_destroy(lrm_state->rsc_info_cache);
+ }
+ if (lrm_state->resource_history) {
+ crm_trace("Destroying history op cache with %u members",
+ g_hash_table_size(lrm_state->resource_history));
+ g_hash_table_destroy(lrm_state->resource_history);
+ }
+ if (lrm_state->deletion_ops) {
+ crm_trace("Destroying deletion op cache with %u members",
+ g_hash_table_size(lrm_state->deletion_ops));
+ g_hash_table_destroy(lrm_state->deletion_ops);
+ }
+ if (lrm_state->active_ops != NULL) {
+ crm_trace("Destroying pending op cache with %u members",
+ g_hash_table_size(lrm_state->active_ops));
+ g_hash_table_destroy(lrm_state->active_ops);
+ }
+ metadata_cache_free(lrm_state->metadata_cache);
+
+ free((char *)lrm_state->node_name);
+ free(lrm_state);
+}
+
+void
+lrm_state_reset_tables(lrm_state_t * lrm_state, gboolean reset_metadata)
+{
+ if (lrm_state->resource_history) {
+ crm_trace("Resetting resource history cache with %u members",
+ g_hash_table_size(lrm_state->resource_history));
+ g_hash_table_remove_all(lrm_state->resource_history);
+ }
+ if (lrm_state->deletion_ops) {
+ crm_trace("Resetting deletion operations cache with %u members",
+ g_hash_table_size(lrm_state->deletion_ops));
+ g_hash_table_remove_all(lrm_state->deletion_ops);
+ }
+ if (lrm_state->active_ops != NULL) {
+ crm_trace("Resetting active operations cache with %u members",
+ g_hash_table_size(lrm_state->active_ops));
+ g_hash_table_remove_all(lrm_state->active_ops);
+ }
+ if (lrm_state->rsc_info_cache) {
+ crm_trace("Resetting resource information cache with %u members",
+ g_hash_table_size(lrm_state->rsc_info_cache));
+ g_hash_table_remove_all(lrm_state->rsc_info_cache);
+ }
+ if (reset_metadata) {
+ metadata_cache_reset(lrm_state->metadata_cache);
+ }
+}
+
+gboolean
+lrm_state_init_local(void)
+{
+ if (lrm_state_table) {
+ return TRUE;
+ }
+
+ lrm_state_table = pcmk__strikey_table(NULL, internal_lrm_state_destroy);
+ if (!lrm_state_table) {
+ return FALSE;
+ }
+
+ proxy_table = pcmk__strikey_table(NULL, remote_proxy_free);
+ if (!proxy_table) {
+ g_hash_table_destroy(lrm_state_table);
+ lrm_state_table = NULL;
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+void
+lrm_state_destroy_all(void)
+{
+ if (lrm_state_table) {
+ crm_trace("Destroying state table with %u members",
+ g_hash_table_size(lrm_state_table));
+ g_hash_table_destroy(lrm_state_table); lrm_state_table = NULL;
+ }
+ if(proxy_table) {
+ crm_trace("Destroying proxy table with %u members",
+ g_hash_table_size(proxy_table));
+ g_hash_table_destroy(proxy_table); proxy_table = NULL;
+ }
+}
+
+lrm_state_t *
+lrm_state_find(const char *node_name)
+{
+ if (!node_name) {
+ return NULL;
+ }
+ return g_hash_table_lookup(lrm_state_table, node_name);
+}
+
+lrm_state_t *
+lrm_state_find_or_create(const char *node_name)
+{
+ lrm_state_t *lrm_state;
+
+ lrm_state = g_hash_table_lookup(lrm_state_table, node_name);
+ if (!lrm_state) {
+ lrm_state = lrm_state_create(node_name);
+ }
+
+ return lrm_state;
+}
+
+GList *
+lrm_state_get_list(void)
+{
+ return g_hash_table_get_values(lrm_state_table);
+}
+
+void
+lrm_state_disconnect_only(lrm_state_t * lrm_state)
+{
+ int removed = 0;
+
+ if (!lrm_state->conn) {
+ return;
+ }
+ crm_trace("Disconnecting %s", lrm_state->node_name);
+
+ remote_proxy_disconnect_by_node(lrm_state->node_name);
+
+ ((lrmd_t *) lrm_state->conn)->cmds->disconnect(lrm_state->conn);
+
+ if (!pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ removed = g_hash_table_foreach_remove(lrm_state->active_ops,
+ fail_pending_op, lrm_state);
+ crm_trace("Synthesized %d operation failures for %s", removed, lrm_state->node_name);
+ }
+}
+
+void
+lrm_state_disconnect(lrm_state_t * lrm_state)
+{
+ if (!lrm_state->conn) {
+ return;
+ }
+
+ lrm_state_disconnect_only(lrm_state);
+
+ lrmd_api_delete(lrm_state->conn);
+ lrm_state->conn = NULL;
+}
+
+int
+lrm_state_is_connected(lrm_state_t * lrm_state)
+{
+ if (!lrm_state->conn) {
+ return FALSE;
+ }
+ return ((lrmd_t *) lrm_state->conn)->cmds->is_connected(lrm_state->conn);
+}
+
+int
+lrm_state_poke_connection(lrm_state_t * lrm_state)
+{
+
+ if (!lrm_state->conn) {
+ return -ENOTCONN;
+ }
+ return ((lrmd_t *) lrm_state->conn)->cmds->poke_connection(lrm_state->conn);
+}
+
+// \return Standard Pacemaker return code
+int
+controld_connect_local_executor(lrm_state_t *lrm_state)
+{
+ int rc = pcmk_rc_ok;
+
+ if (lrm_state->conn == NULL) {
+ lrmd_t *api = NULL;
+
+ rc = lrmd__new(&api, NULL, NULL, 0);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+ api->cmds->set_callback(api, lrm_op_callback);
+ lrm_state->conn = api;
+ }
+
+ rc = ((lrmd_t *) lrm_state->conn)->cmds->connect(lrm_state->conn,
+ CRM_SYSTEM_CRMD, NULL);
+ rc = pcmk_legacy2rc(rc);
+
+ if (rc == pcmk_rc_ok) {
+ lrm_state->num_lrm_register_fails = 0;
+ } else {
+ lrm_state->num_lrm_register_fails++;
+ }
+ return rc;
+}
+
+static remote_proxy_t *
+crmd_remote_proxy_new(lrmd_t *lrmd, const char *node_name, const char *session_id, const char *channel)
+{
+ struct ipc_client_callbacks proxy_callbacks = {
+ .dispatch = remote_proxy_dispatch,
+ .destroy = remote_proxy_disconnected
+ };
+ remote_proxy_t *proxy = remote_proxy_new(lrmd, &proxy_callbacks, node_name,
+ session_id, channel);
+ return proxy;
+}
+
+gboolean
+crmd_is_proxy_session(const char *session)
+{
+ return g_hash_table_lookup(proxy_table, session) ? TRUE : FALSE;
+}
+
+void
+crmd_proxy_send(const char *session, xmlNode *msg)
+{
+ remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session);
+ lrm_state_t *lrm_state = NULL;
+
+ if (!proxy) {
+ return;
+ }
+ crm_log_xml_trace(msg, "to-proxy");
+ lrm_state = lrm_state_find(proxy->node_name);
+ if (lrm_state) {
+ crm_trace("Sending event to %.8s on %s", proxy->session_id, proxy->node_name);
+ remote_proxy_relay_event(proxy, msg);
+ }
+}
+
+static void
+crmd_proxy_dispatch(const char *session, xmlNode *msg)
+{
+ crm_trace("Processing proxied IPC message from session %s", session);
+ crm_log_xml_trace(msg, "controller[inbound]");
+ crm_xml_add(msg, F_CRM_SYS_FROM, session);
+ if (controld_authorize_ipc_message(msg, NULL, session)) {
+ route_message(C_IPC_MESSAGE, msg);
+ }
+ controld_trigger_fsa();
+}
+
+static void
+remote_config_check(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ if (rc != pcmk_ok) {
+ crm_err("Query resulted in an error: %s", pcmk_strerror(rc));
+
+ if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
+ crm_err("The cluster is mis-configured - shutting down and staying down");
+ }
+
+ } else {
+ lrmd_t * lrmd = (lrmd_t *)user_data;
+ crm_time_t *now = crm_time_new(NULL);
+ GHashTable *config_hash = pcmk__strkey_table(free, free);
+
+ crm_debug("Call %d : Parsing CIB options", call_id);
+
+ pe_unpack_nvpairs(output, output, XML_CIB_TAG_PROPSET, NULL,
+ config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL);
+
+ /* Now send it to the remote peer */
+ lrmd__validate_remote_settings(lrmd, config_hash);
+
+ g_hash_table_destroy(config_hash);
+ crm_time_free(now);
+ }
+}
+
+static void
+crmd_remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg)
+{
+ lrm_state_t *lrm_state = userdata;
+ const char *session = crm_element_value(msg, F_LRMD_IPC_SESSION);
+ remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session);
+
+ const char *op = crm_element_value(msg, F_LRMD_IPC_OP);
+ if (pcmk__str_eq(op, LRMD_IPC_OP_NEW, pcmk__str_casei)) {
+ const char *channel = crm_element_value(msg, F_LRMD_IPC_IPC_SERVER);
+
+ proxy = crmd_remote_proxy_new(lrmd, lrm_state->node_name, session, channel);
+ if (!remote_ra_controlling_guest(lrm_state)) {
+ if (proxy != NULL) {
+ cib_t *cib_conn = controld_globals.cib_conn;
+
+ /* Look up stonith-watchdog-timeout and send to the remote peer for validation */
+ int rc = cib_conn->cmds->query(cib_conn, XML_CIB_TAG_CRMCONFIG,
+ NULL, cib_scope_local);
+ cib_conn->cmds->register_callback_full(cib_conn, rc, 10, FALSE,
+ lrmd,
+ "remote_config_check",
+ remote_config_check,
+ NULL);
+ }
+ } else {
+ crm_debug("Skipping remote_config_check for guest-nodes");
+ }
+
+ } else if (pcmk__str_eq(op, LRMD_IPC_OP_SHUTDOWN_REQ, pcmk__str_casei)) {
+ char *now_s = NULL;
+
+ crm_notice("%s requested shutdown of its remote connection",
+ lrm_state->node_name);
+
+ if (!remote_ra_is_in_maintenance(lrm_state)) {
+ now_s = pcmk__ttoa(time(NULL));
+ update_attrd(lrm_state->node_name, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, TRUE);
+ free(now_s);
+
+ remote_proxy_ack_shutdown(lrmd);
+
+ crm_warn("Reconnection attempts to %s may result in failures that must be cleared",
+ lrm_state->node_name);
+ } else {
+ remote_proxy_nack_shutdown(lrmd);
+
+ crm_notice("Remote resource for %s is not managed so no ordered shutdown happening",
+ lrm_state->node_name);
+ }
+ return;
+
+ } else if (pcmk__str_eq(op, LRMD_IPC_OP_REQUEST, pcmk__str_casei) && proxy && proxy->is_local) {
+ /* This is for the controller, which we are, so don't try
+ * to send to ourselves over IPC -- do it directly.
+ */
+ int flags = 0;
+ xmlNode *request = get_message_xml(msg, F_LRMD_IPC_MSG);
+
+ CRM_CHECK(request != NULL, return);
+ CRM_CHECK(lrm_state->node_name, return);
+ crm_xml_add(request, XML_ACL_TAG_ROLE, "pacemaker-remote");
+ pcmk__update_acl_user(request, F_LRMD_IPC_USER, lrm_state->node_name);
+
+ /* Pacemaker Remote nodes don't know their own names (as known to the
+ * cluster). When getting a node info request with no name or ID, add
+ * the name, so we don't return info for ourselves instead of the
+ * Pacemaker Remote node.
+ */
+ if (pcmk__str_eq(crm_element_value(request, F_CRM_TASK), CRM_OP_NODE_INFO, pcmk__str_casei)) {
+ int node_id = 0;
+
+ crm_element_value_int(request, XML_ATTR_ID, &node_id);
+ if ((node_id <= 0)
+ && (crm_element_value(request, XML_ATTR_UNAME) == NULL)) {
+ crm_xml_add(request, XML_ATTR_UNAME, lrm_state->node_name);
+ }
+ }
+
+ crmd_proxy_dispatch(session, request);
+
+ crm_element_value_int(msg, F_LRMD_IPC_MSG_FLAGS, &flags);
+ if (flags & crm_ipc_client_response) {
+ int msg_id = 0;
+ xmlNode *op_reply = create_xml_node(NULL, "ack");
+
+ crm_xml_add(op_reply, "function", __func__);
+ crm_xml_add_int(op_reply, "line", __LINE__);
+
+ crm_element_value_int(msg, F_LRMD_IPC_MSG_ID, &msg_id);
+ remote_proxy_relay_response(proxy, op_reply, msg_id);
+
+ free_xml(op_reply);
+ }
+
+ } else {
+ remote_proxy_cb(lrmd, lrm_state->node_name, msg);
+ }
+}
+
+
+// \return Standard Pacemaker return code
+int
+controld_connect_remote_executor(lrm_state_t *lrm_state, const char *server,
+ int port, int timeout_ms)
+{
+ int rc = pcmk_rc_ok;
+
+ if (lrm_state->conn == NULL) {
+ lrmd_t *api = NULL;
+
+ rc = lrmd__new(&api, lrm_state->node_name, server, port);
+ if (rc != pcmk_rc_ok) {
+ crm_warn("Pacemaker Remote connection to %s:%s failed: %s "
+ CRM_XS " rc=%d", server, port, pcmk_rc_str(rc), rc);
+
+ return rc;
+ }
+ lrm_state->conn = api;
+ api->cmds->set_callback(api, remote_lrm_op_callback);
+ lrmd_internal_set_proxy_callback(api, lrm_state, crmd_remote_proxy_cb);
+ }
+
+ crm_trace("Initiating remote connection to %s:%d with timeout %dms",
+ server, port, timeout_ms);
+ rc = ((lrmd_t *) lrm_state->conn)->cmds->connect_async(lrm_state->conn,
+ lrm_state->node_name,
+ timeout_ms);
+ if (rc == pcmk_ok) {
+ lrm_state->num_lrm_register_fails = 0;
+ } else {
+ lrm_state->num_lrm_register_fails++; // Ignored for remote connections
+ }
+ return pcmk_legacy2rc(rc);
+}
+
+int
+lrm_state_get_metadata(lrm_state_t * lrm_state,
+ const char *class,
+ const char *provider,
+ const char *agent, char **output, enum lrmd_call_options options)
+{
+ lrmd_key_value_t *params = NULL;
+
+ if (!lrm_state->conn) {
+ return -ENOTCONN;
+ }
+
+ /* Add the node name to the environment, as is done with normal resource
+ * action calls. Meta-data calls shouldn't need it, but some agents are
+ * written with an ocf_local_nodename call at the beginning regardless of
+ * action. Without the environment variable, the agent would try to contact
+ * the controller to get the node name -- but the controller would be
+ * blocking on the synchronous meta-data call.
+ *
+ * At this point, we have to assume that agents are unlikely to make other
+ * calls that require the controller, such as crm_node --quorum or
+ * --cluster-id.
+ *
+ * @TODO Make meta-data calls asynchronous. (This will be part of a larger
+ * project to make meta-data calls via the executor rather than directly.)
+ */
+ params = lrmd_key_value_add(params, CRM_META "_" XML_LRM_ATTR_TARGET,
+ lrm_state->node_name);
+
+ return ((lrmd_t *) lrm_state->conn)->cmds->get_metadata_params(lrm_state->conn,
+ class, provider, agent, output, options, params);
+}
+
+int
+lrm_state_cancel(lrm_state_t *lrm_state, const char *rsc_id, const char *action,
+ guint interval_ms)
+{
+ if (!lrm_state->conn) {
+ return -ENOTCONN;
+ }
+
+ /* Figure out a way to make this async?
+ * NOTICE: Currently it's synced and directly acknowledged in do_lrm_invoke(). */
+ if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
+ return remote_ra_cancel(lrm_state, rsc_id, action, interval_ms);
+ }
+ return ((lrmd_t *) lrm_state->conn)->cmds->cancel(lrm_state->conn, rsc_id,
+ action, interval_ms);
+}
+
+lrmd_rsc_info_t *
+lrm_state_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options)
+{
+ lrmd_rsc_info_t *rsc = NULL;
+
+ if (!lrm_state->conn) {
+ return NULL;
+ }
+ if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
+ return remote_ra_get_rsc_info(lrm_state, rsc_id);
+ }
+
+ rsc = g_hash_table_lookup(lrm_state->rsc_info_cache, rsc_id);
+ if (rsc == NULL) {
+ /* only contact the lrmd if we don't already have a cached rsc info */
+ rsc = ((lrmd_t *) lrm_state->conn)->cmds->get_rsc_info(lrm_state->conn, rsc_id, options);
+ if (rsc == NULL) {
+ return NULL;
+ }
+ /* cache the result */
+ g_hash_table_insert(lrm_state->rsc_info_cache, rsc->id, rsc);
+ }
+
+ return lrmd_copy_rsc_info(rsc);
+
+}
+
+/*!
+ * \internal
+ * \brief Initiate a resource agent action
+ *
+ * \param[in,out] lrm_state Executor state object
+ * \param[in] rsc_id ID of resource for action
+ * \param[in] action Action to execute
+ * \param[in] userdata String to copy and pass to execution callback
+ * \param[in] interval_ms Action interval (in milliseconds)
+ * \param[in] timeout_ms Action timeout (in milliseconds)
+ * \param[in] start_delay_ms Delay (in ms) before initiating action
+ * \param[in] parameters Hash table of resource parameters
+ * \param[out] call_id Where to store call ID on success
+ *
+ * \return Standard Pacemaker return code
+ */
+int
+controld_execute_resource_agent(lrm_state_t *lrm_state, const char *rsc_id,
+ const char *action, const char *userdata,
+ guint interval_ms, int timeout_ms,
+ int start_delay_ms, GHashTable *parameters,
+ int *call_id)
+{
+ int rc = pcmk_rc_ok;
+ lrmd_key_value_t *params = NULL;
+
+ if (lrm_state->conn == NULL) {
+ return ENOTCONN;
+ }
+
+ // Convert parameters from hash table to list
+ if (parameters != NULL) {
+ const char *key = NULL;
+ const char *value = NULL;
+ GHashTableIter iter;
+
+ g_hash_table_iter_init(&iter, parameters);
+ while (g_hash_table_iter_next(&iter, (gpointer *) &key,
+ (gpointer *) &value)) {
+ params = lrmd_key_value_add(params, key, value);
+ }
+ }
+
+ if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
+ rc = controld_execute_remote_agent(lrm_state, rsc_id, action,
+ userdata, interval_ms, timeout_ms,
+ start_delay_ms, params, call_id);
+
+ } else {
+ rc = ((lrmd_t *) lrm_state->conn)->cmds->exec(lrm_state->conn, rsc_id,
+ action, userdata,
+ interval_ms, timeout_ms,
+ start_delay_ms,
+ lrmd_opt_notify_changes_only,
+ params);
+ if (rc < 0) {
+ rc = pcmk_legacy2rc(rc);
+ } else {
+ *call_id = rc;
+ rc = pcmk_rc_ok;
+ }
+ }
+ return rc;
+}
+
+int
+lrm_state_register_rsc(lrm_state_t * lrm_state,
+ const char *rsc_id,
+ const char *class,
+ const char *provider, const char *agent, enum lrmd_call_options options)
+{
+ lrmd_t *conn = (lrmd_t *) lrm_state->conn;
+
+ if (conn == NULL) {
+ return -ENOTCONN;
+ }
+
+ if (is_remote_lrmd_ra(agent, provider, NULL)) {
+ return lrm_state_find_or_create(rsc_id)? pcmk_ok : -EINVAL;
+ }
+
+ /* @TODO Implement an asynchronous version of this (currently a blocking
+ * call to the lrmd).
+ */
+ return conn->cmds->register_rsc(lrm_state->conn, rsc_id, class, provider,
+ agent, options);
+}
+
+int
+lrm_state_unregister_rsc(lrm_state_t * lrm_state,
+ const char *rsc_id, enum lrmd_call_options options)
+{
+ if (!lrm_state->conn) {
+ return -ENOTCONN;
+ }
+
+ if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
+ lrm_state_destroy(rsc_id);
+ return pcmk_ok;
+ }
+
+ g_hash_table_remove(lrm_state->rsc_info_cache, rsc_id);
+
+ /* @TODO Optimize this ... this function is a blocking round trip from
+ * client to daemon. The controld_execd_state.c code path that uses this
+ * function should always treat it as an async operation. The executor API
+ * should make an async version available.
+ */
+ return ((lrmd_t *) lrm_state->conn)->cmds->unregister_rsc(lrm_state->conn, rsc_id, options);
+}
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
new file mode 100644
index 0000000..89cb61f
--- /dev/null
+++ b/daemons/controld/controld_fencing.c
@@ -0,0 +1,1108 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/stonith-ng.h>
+#include <crm/fencing/internal.h>
+
+#include <pacemaker-controld.h>
+
+static void
+tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
+
+/*
+ * stonith failure counting
+ *
+ * We don't want to get stuck in a permanent fencing loop. Keep track of the
+ * number of fencing failures for each target node, and the most we'll restart a
+ * transition for.
+ */
+
+struct st_fail_rec {
+ int count;
+};
+
+static bool fence_reaction_panic = false;
+static unsigned long int stonith_max_attempts = 10;
+static GHashTable *stonith_failures = NULL;
+
+/*!
+ * \internal
+ * \brief Update max fencing attempts before giving up
+ *
+ * \param[in] value New max fencing attempts
+ */
+static void
+update_stonith_max_attempts(const char *value)
+{
+ stonith_max_attempts = char2score(value);
+ if (stonith_max_attempts < 1UL) {
+ stonith_max_attempts = 10UL;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Configure reaction to notification of local node being fenced
+ *
+ * \param[in] reaction_s Reaction type
+ */
+static void
+set_fence_reaction(const char *reaction_s)
+{
+ if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) {
+ fence_reaction_panic = true;
+
+ } else {
+ if (!pcmk__str_eq(reaction_s, "stop", pcmk__str_casei)) {
+ crm_warn("Invalid value '%s' for %s, using 'stop'",
+ reaction_s, XML_CONFIG_ATTR_FENCE_REACTION);
+ }
+ fence_reaction_panic = false;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Configure fencing options based on the CIB
+ *
+ * \param[in,out] options Name/value pairs for configured options
+ */
+void
+controld_configure_fencing(GHashTable *options)
+{
+ const char *value = NULL;
+
+ value = g_hash_table_lookup(options, XML_CONFIG_ATTR_FENCE_REACTION);
+ set_fence_reaction(value);
+
+ value = g_hash_table_lookup(options, "stonith-max-attempts");
+ update_stonith_max_attempts(value);
+}
+
+static gboolean
+too_many_st_failures(const char *target)
+{
+ GHashTableIter iter;
+ const char *key = NULL;
+ struct st_fail_rec *value = NULL;
+
+ if (stonith_failures == NULL) {
+ return FALSE;
+ }
+
+ if (target == NULL) {
+ g_hash_table_iter_init(&iter, stonith_failures);
+ while (g_hash_table_iter_next(&iter, (gpointer *) &key,
+ (gpointer *) &value)) {
+
+ if (value->count >= stonith_max_attempts) {
+ target = (const char*)key;
+ goto too_many;
+ }
+ }
+ } else {
+ value = g_hash_table_lookup(stonith_failures, target);
+ if ((value != NULL) && (value->count >= stonith_max_attempts)) {
+ goto too_many;
+ }
+ }
+ return FALSE;
+
+too_many:
+ crm_warn("Too many failures (%d) to fence %s, giving up",
+ value->count, target);
+ return TRUE;
+}
+
+/*!
+ * \internal
+ * \brief Reset a stonith fail count
+ *
+ * \param[in] target Name of node to reset, or NULL for all
+ */
+void
+st_fail_count_reset(const char *target)
+{
+ if (stonith_failures == NULL) {
+ return;
+ }
+
+ if (target) {
+ struct st_fail_rec *rec = NULL;
+
+ rec = g_hash_table_lookup(stonith_failures, target);
+ if (rec) {
+ rec->count = 0;
+ }
+ } else {
+ GHashTableIter iter;
+ const char *key = NULL;
+ struct st_fail_rec *rec = NULL;
+
+ g_hash_table_iter_init(&iter, stonith_failures);
+ while (g_hash_table_iter_next(&iter, (gpointer *) &key,
+ (gpointer *) &rec)) {
+ rec->count = 0;
+ }
+ }
+}
+
+static void
+st_fail_count_increment(const char *target)
+{
+ struct st_fail_rec *rec = NULL;
+
+ if (stonith_failures == NULL) {
+ stonith_failures = pcmk__strkey_table(free, free);
+ }
+
+ rec = g_hash_table_lookup(stonith_failures, target);
+ if (rec) {
+ rec->count++;
+ } else {
+ rec = malloc(sizeof(struct st_fail_rec));
+ if(rec == NULL) {
+ return;
+ }
+
+ rec->count = 1;
+ g_hash_table_insert(stonith_failures, strdup(target), rec);
+ }
+}
+
+/* end stonith fail count functions */
+
+
+static void
+cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
+ void *user_data)
+{
+ if (rc < pcmk_ok) {
+ crm_err("Fencing update %d for %s: failed - %s (%d)",
+ call_id, (char *)user_data, pcmk_strerror(rc), rc);
+ crm_log_xml_warn(msg, "Failed update");
+ abort_transition(INFINITY, pcmk__graph_shutdown, "CIB update failed",
+ NULL);
+
+ } else {
+ crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
+ }
+}
+
+static void
+send_stonith_update(pcmk__graph_action_t *action, const char *target,
+ const char *uuid)
+{
+ int rc = pcmk_ok;
+ crm_node_t *peer = NULL;
+
+ /* We (usually) rely on the membership layer to do node_update_cluster,
+ * and the peer status callback to do node_update_peer, because the node
+ * might have already rejoined before we get the stonith result here.
+ */
+ int flags = node_update_join | node_update_expected;
+
+ /* zero out the node-status & remove all LRM status info */
+ xmlNode *node_state = NULL;
+
+ CRM_CHECK(target != NULL, return);
+ CRM_CHECK(uuid != NULL, return);
+
+ /* Make sure the membership and join caches are accurate */
+ peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY);
+
+ CRM_CHECK(peer != NULL, return);
+
+ if (peer->state == NULL) {
+ /* Usually, we rely on the membership layer to update the cluster state
+ * in the CIB. However, if the node has never been seen, do it here, so
+ * the node is not considered unclean.
+ */
+ flags |= node_update_cluster;
+ }
+
+ if (peer->uuid == NULL) {
+ crm_info("Recording uuid '%s' for node '%s'", uuid, target);
+ peer->uuid = strdup(uuid);
+ }
+
+ crmd_peer_down(peer, TRUE);
+
+ /* Generate a node state update for the CIB */
+ node_state = create_node_state_update(peer, flags, NULL, __func__);
+
+ /* we have to mark whether or not remote nodes have already been fenced */
+ if (peer->flags & crm_remote_node) {
+ char *now_s = pcmk__ttoa(time(NULL));
+
+ crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s);
+ free(now_s);
+ }
+
+ /* Force our known ID */
+ crm_xml_add(node_state, XML_ATTR_ID, uuid);
+
+ rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
+ XML_CIB_TAG_STATUS, node_state,
+ cib_scope_local
+ |cib_can_create);
+
+ /* Delay processing the trigger until the update completes */
+ crm_debug("Sending fencing update %d for %s", rc, target);
+ fsa_register_cib_callback(rc, strdup(target), cib_fencing_updated);
+
+ // Make sure it sticks
+ /* controld_globals.cib_conn->cmds->bump_epoch(controld_globals.cib_conn,
+ * cib_scope_local);
+ */
+
+ controld_delete_node_state(peer->uname, controld_section_all,
+ cib_scope_local);
+ free_xml(node_state);
+ return;
+}
+
+/*!
+ * \internal
+ * \brief Abort transition due to stonith failure
+ *
+ * \param[in] abort_action Whether to restart or stop transition
+ * \param[in] target Don't restart if this (NULL for any) has too many failures
+ * \param[in] reason Log this stonith action XML as abort reason (or NULL)
+ */
+static void
+abort_for_stonith_failure(enum pcmk__graph_next abort_action,
+ const char *target, const xmlNode *reason)
+{
+ /* If stonith repeatedly fails, we eventually give up on starting a new
+ * transition for that reason.
+ */
+ if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) {
+ abort_action = pcmk__graph_wait;
+ }
+ abort_transition(INFINITY, abort_action, "Stonith failed", reason);
+}
+
+
+/*
+ * stonith cleanup list
+ *
+ * If the DC is shot, proper notifications might not go out.
+ * The stonith cleanup list allows the cluster to (re-)send
+ * notifications once a new DC is elected.
+ */
+
+static GList *stonith_cleanup_list = NULL;
+
+/*!
+ * \internal
+ * \brief Add a node to the stonith cleanup list
+ *
+ * \param[in] target Name of node to add
+ */
+void
+add_stonith_cleanup(const char *target) {
+ stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
+}
+
+/*!
+ * \internal
+ * \brief Remove a node from the stonith cleanup list
+ *
+ * \param[in] Name of node to remove
+ */
+void
+remove_stonith_cleanup(const char *target)
+{
+ GList *iter = stonith_cleanup_list;
+
+ while (iter != NULL) {
+ GList *tmp = iter;
+ char *iter_name = tmp->data;
+
+ iter = iter->next;
+ if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) {
+ crm_trace("Removing %s from the cleanup list", iter_name);
+ stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
+ free(iter_name);
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Purge all entries from the stonith cleanup list
+ */
+void
+purge_stonith_cleanup(void)
+{
+ if (stonith_cleanup_list) {
+ GList *iter = NULL;
+
+ for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
+ char *target = iter->data;
+
+ crm_info("Purging %s from stonith cleanup list", target);
+ free(target);
+ }
+ g_list_free(stonith_cleanup_list);
+ stonith_cleanup_list = NULL;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Send stonith updates for all entries in cleanup list, then purge it
+ */
+void
+execute_stonith_cleanup(void)
+{
+ GList *iter;
+
+ for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
+ char *target = iter->data;
+ crm_node_t *target_node = crm_get_peer(0, target);
+ const char *uuid = crm_peer_uuid(target_node);
+
+ crm_notice("Marking %s, target of a previous stonith action, as clean", target);
+ send_stonith_update(NULL, target, uuid);
+ free(target);
+ }
+ g_list_free(stonith_cleanup_list);
+ stonith_cleanup_list = NULL;
+}
+
+/* end stonith cleanup list functions */
+
+
+/* stonith API client
+ *
+ * Functions that need to interact directly with the fencer via its API
+ */
+
+static stonith_t *stonith_api = NULL;
+static crm_trigger_t *stonith_reconnect = NULL;
+static char *te_client_id = NULL;
+
+static gboolean
+fail_incompletable_stonith(pcmk__graph_t *graph)
+{
+ GList *lpc = NULL;
+ const char *task = NULL;
+ xmlNode *last_action = NULL;
+
+ if (graph == NULL) {
+ return FALSE;
+ }
+
+ for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
+ GList *lpc2 = NULL;
+ pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
+
+ if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
+ continue;
+ }
+
+ for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
+ pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data;
+
+ if ((action->type != pcmk__cluster_graph_action)
+ || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
+ continue;
+ }
+
+ task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
+ if (task && pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
+ last_action = action->xml;
+ pcmk__update_graph(graph, action);
+ crm_notice("Failing action %d (%s): fencer terminated",
+ action->id, ID(action->xml));
+ }
+ }
+ }
+
+ if (last_action != NULL) {
+ crm_warn("Fencer failure resulted in unrunnable actions");
+ abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static void
+tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
+{
+ te_cleanup_stonith_history_sync(st, FALSE);
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) {
+ crm_crit("Fencing daemon connection failed");
+ mainloop_set_trigger(stonith_reconnect);
+
+ } else {
+ crm_info("Fencing daemon disconnected");
+ }
+
+ if (stonith_api) {
+ /* the client API won't properly reconnect notifications
+ * if they are still in the table - so remove them
+ */
+ if (stonith_api->state != stonith_disconnected) {
+ stonith_api->cmds->disconnect(st);
+ }
+ stonith_api->cmds->remove_notification(stonith_api, NULL);
+ }
+
+ if (AM_I_DC) {
+ fail_incompletable_stonith(controld_globals.transition_graph);
+ trigger_graph();
+ }
+}
+
+/*!
+ * \internal
+ * \brief Handle an event notification from the fencing API
+ *
+ * \param[in] st Fencing API connection (ignored)
+ * \param[in] event Fencing API event notification
+ */
+static void
+handle_fence_notification(stonith_t *st, stonith_event_t *event)
+{
+ bool succeeded = true;
+ const char *executioner = "the cluster";
+ const char *client = "a client";
+ const char *reason = NULL;
+ int exec_status;
+
+ if (te_client_id == NULL) {
+ te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
+ (unsigned long) getpid());
+ }
+
+ if (event == NULL) {
+ crm_err("Notify data not found");
+ return;
+ }
+
+ if (event->executioner != NULL) {
+ executioner = event->executioner;
+ }
+ if (event->client_origin != NULL) {
+ client = event->client_origin;
+ }
+
+ exec_status = stonith__event_execution_status(event);
+ if ((stonith__event_exit_status(event) != CRM_EX_OK)
+ || (exec_status != PCMK_EXEC_DONE)) {
+ succeeded = false;
+ if (exec_status == PCMK_EXEC_DONE) {
+ exec_status = PCMK_EXEC_ERROR;
+ }
+ }
+ reason = stonith__event_exit_reason(event);
+
+ crmd_alert_fencing_op(event);
+
+ if (pcmk__str_eq("on", event->action, pcmk__str_none)) {
+ // Unfencing doesn't need special handling, just a log message
+ if (succeeded) {
+ crm_notice("%s was unfenced by %s at the request of %s@%s",
+ event->target, executioner, client, event->origin);
+ } else {
+ crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
+ event->target, executioner,
+ pcmk_exec_status_str(exec_status),
+ ((reason == NULL)? "" : ": "),
+ ((reason == NULL)? "" : reason),
+ stonith__event_exit_status(event));
+ }
+ return;
+ }
+
+ if (succeeded
+ && pcmk__str_eq(event->target, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ /* We were notified of our own fencing. Most likely, either fencing was
+ * misconfigured, or fabric fencing that doesn't cut cluster
+ * communication is in use.
+ *
+ * Either way, shutting down the local host is a good idea, to require
+ * administrator intervention. Also, other nodes would otherwise likely
+ * set our status to lost because of the fencing callback and discard
+ * our subsequent election votes as "not part of our cluster".
+ */
+ crm_crit("We were allegedly just fenced by %s for %s!",
+ executioner, event->origin); // Dumps blackbox if enabled
+ if (fence_reaction_panic) {
+ pcmk__panic(__func__);
+ } else {
+ crm_exit(CRM_EX_FATAL);
+ }
+ return; // Should never get here
+ }
+
+ /* Update the count of fencing failures for this target, in case we become
+ * DC later. The current DC has already updated its fail count in
+ * tengine_stonith_callback().
+ */
+ if (!AM_I_DC) {
+ if (succeeded) {
+ st_fail_count_reset(event->target);
+ } else {
+ st_fail_count_increment(event->target);
+ }
+ }
+
+ crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
+ "%s%s%s%s " CRM_XS " event=%s",
+ event->target, (succeeded? "" : " not"),
+ event->action, executioner, client, event->origin,
+ (succeeded? "OK" : pcmk_exec_status_str(exec_status)),
+ ((reason == NULL)? "" : " ("),
+ ((reason == NULL)? "" : reason),
+ ((reason == NULL)? "" : ")"),
+ event->id);
+
+ if (succeeded) {
+ crm_node_t *peer = pcmk__search_known_node_cache(0, event->target,
+ CRM_GET_PEER_ANY);
+ const char *uuid = NULL;
+
+ if (peer == NULL) {
+ return;
+ }
+
+ uuid = crm_peer_uuid(peer);
+
+ if (AM_I_DC) {
+ /* The DC always sends updates */
+ send_stonith_update(NULL, event->target, uuid);
+
+ /* @TODO Ideally, at this point, we'd check whether the fenced node
+ * hosted any guest nodes, and call remote_node_down() for them.
+ * Unfortunately, the controller doesn't have a simple, reliable way
+ * to map hosts to guests. It might be possible to track this in the
+ * peer cache via crm_remote_peer_cache_refresh(). For now, we rely
+ * on the scheduler creating fence pseudo-events for the guests.
+ */
+
+ if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
+ /* Abort the current transition if it wasn't the cluster that
+ * initiated fencing.
+ */
+ crm_info("External fencing operation from %s fenced %s",
+ client, event->target);
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "External Fencing Operation", NULL);
+ }
+
+ } else if (pcmk__str_eq(controld_globals.dc_name, event->target,
+ pcmk__str_null_matches|pcmk__str_casei)
+ && !pcmk_is_set(peer->flags, crm_remote_node)) {
+ // Assume the target was our DC if we don't currently have one
+
+ if (controld_globals.dc_name != NULL) {
+ crm_notice("Fencing target %s was our DC", event->target);
+ } else {
+ crm_notice("Fencing target %s may have been our DC",
+ event->target);
+ }
+
+ /* Given the CIB resyncing that occurs around elections,
+ * have one node update the CIB now and, if the new DC is different,
+ * have them do so too after the election
+ */
+ if (pcmk__str_eq(event->executioner, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ send_stonith_update(NULL, event->target, uuid);
+ }
+ add_stonith_cleanup(event->target);
+ }
+
+ /* If the target is a remote node, and we host its connection,
+ * immediately fail all monitors so it can be recovered quickly.
+ * The connection won't necessarily drop when a remote node is fenced,
+ * so the failure might not otherwise be detected until the next poke.
+ */
+ if (pcmk_is_set(peer->flags, crm_remote_node)) {
+ remote_ra_fail(event->target);
+ }
+
+ crmd_peer_down(peer, TRUE);
+ }
+}
+
+/*!
+ * \brief Connect to fencer
+ *
+ * \param[in] user_data If NULL, retry failures now, otherwise retry in main loop
+ *
+ * \return TRUE
+ * \note If user_data is NULL, this will wait 2s between attempts, for up to
+ * 30 attempts, meaning the controller could be blocked as long as 58s.
+ */
+static gboolean
+te_connect_stonith(gpointer user_data)
+{
+ int rc = pcmk_ok;
+
+ if (stonith_api == NULL) {
+ stonith_api = stonith_api_new();
+ if (stonith_api == NULL) {
+ crm_err("Could not connect to fencer: API memory allocation failed");
+ return TRUE;
+ }
+ }
+
+ if (stonith_api->state != stonith_disconnected) {
+ crm_trace("Already connected to fencer, no need to retry");
+ return TRUE;
+ }
+
+ if (user_data == NULL) {
+ // Blocking (retry failures now until successful)
+ rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
+ if (rc != pcmk_ok) {
+ crm_err("Could not connect to fencer in 30 attempts: %s "
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
+ }
+ } else {
+ // Non-blocking (retry failures later in main loop)
+ rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
+ if (rc != pcmk_ok) {
+ if (pcmk_is_set(controld_globals.fsa_input_register,
+ R_ST_REQUIRED)) {
+ crm_notice("Fencer connection failed (will retry): %s "
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
+ mainloop_set_trigger(stonith_reconnect);
+ } else {
+ crm_info("Fencer connection failed (ignoring because no longer required): %s "
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
+ }
+ return TRUE;
+ }
+ }
+
+ if (rc == pcmk_ok) {
+ stonith_api->cmds->register_notification(stonith_api,
+ T_STONITH_NOTIFY_DISCONNECT,
+ tengine_stonith_connection_destroy);
+ stonith_api->cmds->register_notification(stonith_api,
+ T_STONITH_NOTIFY_FENCE,
+ handle_fence_notification);
+ stonith_api->cmds->register_notification(stonith_api,
+ T_STONITH_NOTIFY_HISTORY_SYNCED,
+ tengine_stonith_history_synced);
+ te_trigger_stonith_history_sync(TRUE);
+ crm_notice("Fencer successfully connected");
+ }
+
+ return TRUE;
+}
+
+/*!
+ \internal
+ \brief Schedule fencer connection attempt in main loop
+*/
+void
+controld_trigger_fencer_connect(void)
+{
+ if (stonith_reconnect == NULL) {
+ stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
+ te_connect_stonith,
+ GINT_TO_POINTER(TRUE));
+ }
+ controld_set_fsa_input_flags(R_ST_REQUIRED);
+ mainloop_set_trigger(stonith_reconnect);
+}
+
+void
+controld_disconnect_fencer(bool destroy)
+{
+ if (stonith_api) {
+ // Prevent fencer connection from coming up again
+ controld_clear_fsa_input_flags(R_ST_REQUIRED);
+
+ if (stonith_api->state != stonith_disconnected) {
+ stonith_api->cmds->disconnect(stonith_api);
+ }
+ stonith_api->cmds->remove_notification(stonith_api, NULL);
+ }
+ if (destroy) {
+ if (stonith_api) {
+ stonith_api->cmds->free(stonith_api);
+ stonith_api = NULL;
+ }
+ if (stonith_reconnect) {
+ mainloop_destroy_trigger(stonith_reconnect);
+ stonith_reconnect = NULL;
+ }
+ if (te_client_id) {
+ free(te_client_id);
+ te_client_id = NULL;
+ }
+ }
+}
+
+static gboolean
+do_stonith_history_sync(gpointer user_data)
+{
+ if (stonith_api && (stonith_api->state != stonith_disconnected)) {
+ stonith_history_t *history = NULL;
+
+ te_cleanup_stonith_history_sync(stonith_api, FALSE);
+ stonith_api->cmds->history(stonith_api,
+ st_opt_sync_call | st_opt_broadcast,
+ NULL, &history, 5);
+ stonith_history_free(history);
+ return TRUE;
+ } else {
+ crm_info("Skip triggering stonith history-sync as stonith is disconnected");
+ return FALSE;
+ }
+}
+
+static void
+tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
+{
+ char *uuid = NULL;
+ int stonith_id = -1;
+ int transition_id = -1;
+ pcmk__graph_action_t *action = NULL;
+ const char *target = NULL;
+
+ if ((data == NULL) || (data->userdata == NULL)) {
+ crm_err("Ignoring fence operation %d result: "
+ "No transition key given (bug?)",
+ ((data == NULL)? -1 : data->call_id));
+ return;
+ }
+
+ if (!AM_I_DC) {
+ const char *reason = stonith__exit_reason(data);
+
+ if (reason == NULL) {
+ reason = pcmk_exec_status_str(stonith__execution_status(data));
+ }
+ crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s",
+ data->call_id, stonith__exit_status(data), reason,
+ (const char *) data->userdata);
+ return;
+ }
+
+ CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
+ &stonith_id, NULL),
+ goto bail);
+
+ if (controld_globals.transition_graph->complete || (stonith_id < 0)
+ || !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none)
+ || (controld_globals.transition_graph->id != transition_id)) {
+ crm_info("Ignoring fence operation %d result: "
+ "Not from current transition " CRM_XS
+ " complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
+ data->call_id,
+ pcmk__btoa(controld_globals.transition_graph->complete),
+ stonith_id, uuid, controld_globals.te_uuid, transition_id,
+ controld_globals.transition_graph->id);
+ goto bail;
+ }
+
+ action = controld_get_action(stonith_id);
+ if (action == NULL) {
+ crm_err("Ignoring fence operation %d result: "
+ "Action %d not found in transition graph (bug?) "
+ CRM_XS " uuid=%s transition=%d",
+ data->call_id, stonith_id, uuid, transition_id);
+ goto bail;
+ }
+
+ target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ if (target == NULL) {
+ crm_err("Ignoring fence operation %d result: No target given (bug?)",
+ data->call_id);
+ goto bail;
+ }
+
+ stop_te_timer(action);
+ if (stonith__exit_status(data) == CRM_EX_OK) {
+ const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
+ const char *op = crm_meta_value(action->params, "stonith_action");
+
+ crm_info("Fence operation %d for %s succeeded", data->call_id, target);
+ if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
+ te_action_confirmed(action, NULL);
+ if (pcmk__str_eq("on", op, pcmk__str_casei)) {
+ const char *value = NULL;
+ char *now = pcmk__ttoa(time(NULL));
+ gboolean is_remote_node = FALSE;
+
+ /* This check is not 100% reliable, since this node is not
+ * guaranteed to have the remote node cached. However, it
+ * doesn't have to be reliable, since the attribute manager can
+ * learn a node's "remoteness" by other means sooner or later.
+ * This allows it to learn more quickly if this node does have
+ * the information.
+ */
+ if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) {
+ is_remote_node = TRUE;
+ }
+
+ update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
+ is_remote_node);
+ free(now);
+
+ value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
+ update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
+ is_remote_node);
+
+ value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
+ update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
+ is_remote_node);
+
+ } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
+ send_stonith_update(action, target, uuid);
+ pcmk__set_graph_action_flags(action,
+ pcmk__graph_action_sent_update);
+ }
+ }
+ st_fail_count_reset(target);
+
+ } else {
+ enum pcmk__graph_next abort_action = pcmk__graph_restart;
+ int status = stonith__execution_status(data);
+ const char *reason = stonith__exit_reason(data);
+
+ if (reason == NULL) {
+ if (status == PCMK_EXEC_DONE) {
+ reason = "Agent returned error";
+ } else {
+ reason = pcmk_exec_status_str(status);
+ }
+ }
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
+
+ /* If no fence devices were available, there's no use in immediately
+ * checking again, so don't start a new transition in that case.
+ */
+ if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
+ crm_warn("Fence operation %d for %s failed: %s "
+ "(aborting transition and giving up for now)",
+ data->call_id, target, reason);
+ abort_action = pcmk__graph_wait;
+ } else {
+ crm_notice("Fence operation %d for %s failed: %s "
+ "(aborting transition)", data->call_id, target, reason);
+ }
+
+ /* Increment the fail count now, so abort_for_stonith_failure() can
+ * check it. Non-DC nodes will increment it in
+ * handle_fence_notification().
+ */
+ st_fail_count_increment(target);
+ abort_for_stonith_failure(abort_action, target, NULL);
+ }
+
+ pcmk__update_graph(controld_globals.transition_graph, action);
+ trigger_graph();
+
+ bail:
+ free(data->userdata);
+ free(uuid);
+ return;
+}
+
+static int
+fence_with_delay(const char *target, const char *type, int delay)
+{
+ uint32_t options = st_opt_none; // Group of enum stonith_call_options
+ int timeout_sec = (int) (controld_globals.transition_graph->stonith_timeout
+ / 1000);
+
+ if (crmd_join_phase_count(crm_join_confirmed) == 1) {
+ stonith__set_call_options(options, target, st_opt_allow_suicide);
+ }
+ return stonith_api->cmds->fence_with_delay(stonith_api, options, target,
+ type, timeout_sec, 0, delay);
+}
+
+/*!
+ * \internal
+ * \brief Execute a fencing action from a transition graph
+ *
+ * \param[in] graph Transition graph being executed (ignored)
+ * \param[in] action Fencing action to execute
+ *
+ * \return Standard Pacemaker return code
+ */
+int
+controld_execute_fence_action(pcmk__graph_t *graph,
+ pcmk__graph_action_t *action)
+{
+ int rc = 0;
+ const char *id = ID(action->xml);
+ const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
+ const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ const char *type = crm_meta_value(action->params, "stonith_action");
+ char *transition_key = NULL;
+ const char *priority_delay = NULL;
+ int delay_i = 0;
+ gboolean invalid_action = FALSE;
+ int stonith_timeout = (int) (controld_globals.transition_graph->stonith_timeout
+ / 1000);
+
+ CRM_CHECK(id != NULL, invalid_action = TRUE);
+ CRM_CHECK(uuid != NULL, invalid_action = TRUE);
+ CRM_CHECK(type != NULL, invalid_action = TRUE);
+ CRM_CHECK(target != NULL, invalid_action = TRUE);
+
+ if (invalid_action) {
+ crm_log_xml_warn(action->xml, "BadAction");
+ return EPROTO;
+ }
+
+ priority_delay = crm_meta_value(action->params, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY);
+
+ crm_notice("Requesting fencing (%s) targeting node %s "
+ CRM_XS " action=%s timeout=%i%s%s",
+ type, target, id, stonith_timeout,
+ priority_delay ? " priority_delay=" : "",
+ priority_delay ? priority_delay : "");
+
+ /* Passing NULL means block until we can connect... */
+ te_connect_stonith(NULL);
+
+ pcmk__scan_min_int(priority_delay, &delay_i, 0);
+ rc = fence_with_delay(target, type, delay_i);
+ transition_key = pcmk__transition_key(controld_globals.transition_graph->id,
+ action->id, 0,
+ controld_globals.te_uuid),
+ stonith_api->cmds->register_callback(stonith_api, rc,
+ (stonith_timeout
+ + (delay_i > 0 ? delay_i : 0)),
+ st_opt_timeout_updates, transition_key,
+ "tengine_stonith_callback",
+ tengine_stonith_callback);
+ return pcmk_rc_ok;
+}
+
+bool
+controld_verify_stonith_watchdog_timeout(const char *value)
+{
+ const char *our_nodename = controld_globals.our_nodename;
+ gboolean rv = TRUE;
+
+ if (stonith_api && (stonith_api->state != stonith_disconnected) &&
+ stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
+ our_nodename)) {
+ rv = pcmk__valid_sbd_timeout(value);
+ }
+ return rv;
+}
+
+/* end stonith API client functions */
+
+
+/*
+ * stonith history synchronization
+ *
+ * Each node's fencer keeps track of a cluster-wide fencing history. When a node
+ * joins or leaves, we need to synchronize the history across all nodes.
+ */
+
+static crm_trigger_t *stonith_history_sync_trigger = NULL;
+static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
+static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
+
+void
+te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
+{
+ if (free_timers) {
+ mainloop_timer_del(stonith_history_sync_timer_short);
+ stonith_history_sync_timer_short = NULL;
+ mainloop_timer_del(stonith_history_sync_timer_long);
+ stonith_history_sync_timer_long = NULL;
+ } else {
+ mainloop_timer_stop(stonith_history_sync_timer_short);
+ mainloop_timer_stop(stonith_history_sync_timer_long);
+ }
+
+ if (st) {
+ st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED);
+ }
+}
+
+static void
+tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
+{
+ te_cleanup_stonith_history_sync(st, FALSE);
+ crm_debug("Fence-history synced - cancel all timers");
+}
+
+static gboolean
+stonith_history_sync_set_trigger(gpointer user_data)
+{
+ mainloop_set_trigger(stonith_history_sync_trigger);
+ return FALSE;
+}
+
+void
+te_trigger_stonith_history_sync(bool long_timeout)
+{
+ /* trigger a sync in 5s to give more nodes the
+ * chance to show up so that we don't create
+ * unnecessary stonith-history-sync traffic
+ *
+ * the long timeout of 30s is there as a fallback
+ * so that after a successful connection to fenced
+ * we will wait for 30s for the DC to trigger a
+ * history-sync
+ * if this doesn't happen we trigger a sync locally
+ * (e.g. fenced segfaults and is restarted by pacemakerd)
+ */
+
+ /* as we are finally checking the stonith-connection
+ * in do_stonith_history_sync we should be fine
+ * leaving stonith_history_sync_time & stonith_history_sync_trigger
+ * around
+ */
+ if (stonith_history_sync_trigger == NULL) {
+ stonith_history_sync_trigger =
+ mainloop_add_trigger(G_PRIORITY_LOW,
+ do_stonith_history_sync, NULL);
+ }
+
+ if (long_timeout) {
+ if(stonith_history_sync_timer_long == NULL) {
+ stonith_history_sync_timer_long =
+ mainloop_timer_add("history_sync_long", 30000,
+ FALSE, stonith_history_sync_set_trigger,
+ NULL);
+ }
+ crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
+ mainloop_timer_start(stonith_history_sync_timer_long);
+ } else {
+ if(stonith_history_sync_timer_short == NULL) {
+ stonith_history_sync_timer_short =
+ mainloop_timer_add("history_sync_short", 5000,
+ FALSE, stonith_history_sync_set_trigger,
+ NULL);
+ }
+ crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
+ mainloop_timer_start(stonith_history_sync_timer_short);
+ }
+
+}
+
+/* end stonith history synchronization functions */
diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h
new file mode 100644
index 0000000..86a5050
--- /dev/null
+++ b/daemons/controld/controld_fencing.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef CONTROLD_FENCING__H
+# define CONTROLD_FENCING__H
+
+#include <stdbool.h> // bool
+#include <pacemaker-internal.h> // pcmk__graph_t, pcmk__graph_action_t
+
+void controld_configure_fencing(GHashTable *options);
+
+// stonith fail counts
+void st_fail_count_reset(const char * target);
+
+// stonith API client
+void controld_trigger_fencer_connect(void);
+void controld_disconnect_fencer(bool destroy);
+int controld_execute_fence_action(pcmk__graph_t *graph,
+ pcmk__graph_action_t *action);
+bool controld_verify_stonith_watchdog_timeout(const char *value);
+
+// stonith cleanup list
+void add_stonith_cleanup(const char *target);
+void remove_stonith_cleanup(const char *target);
+void purge_stonith_cleanup(void);
+void execute_stonith_cleanup(void);
+
+// stonith history synchronization
+void te_trigger_stonith_history_sync(bool long_timeout);
+void te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers);
+
+#endif
diff --git a/daemons/controld/controld_fsa.c b/daemons/controld/controld_fsa.c
new file mode 100644
index 0000000..622d1c8
--- /dev/null
+++ b/daemons/controld/controld_fsa.c
@@ -0,0 +1,741 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <stdio.h>
+#include <stdint.h> // uint64_t
+#include <string.h>
+#include <time.h>
+
+#include <crm/crm.h>
+#include <crm/lrmd.h>
+#include <crm/cib.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/cluster/election_internal.h>
+#include <crm/cluster.h>
+
+#include <pacemaker-controld.h>
+
+//! Triggers an FSA invocation
+static crm_trigger_t *fsa_trigger = NULL;
+
+#define DOT_PREFIX "actions:trace: "
+#define do_dot_log(fmt, args...) crm_trace( fmt, ##args)
+
+static void do_state_transition(enum crmd_fsa_state cur_state,
+ enum crmd_fsa_state next_state,
+ fsa_data_t *msg_data);
+
+void s_crmd_fsa_actions(fsa_data_t * fsa_data);
+void log_fsa_input(fsa_data_t * stored_msg);
+void init_dotfile(void);
+
+void
+init_dotfile(void)
+{
+ do_dot_log(DOT_PREFIX "digraph \"g\" {");
+ do_dot_log(DOT_PREFIX " size = \"30,30\"");
+ do_dot_log(DOT_PREFIX " graph [");
+ do_dot_log(DOT_PREFIX " fontsize = \"12\"");
+ do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\"");
+ do_dot_log(DOT_PREFIX " fontcolor = \"black\"");
+ do_dot_log(DOT_PREFIX " bb = \"0,0,398.922306,478.927856\"");
+ do_dot_log(DOT_PREFIX " color = \"black\"");
+ do_dot_log(DOT_PREFIX " ]");
+ do_dot_log(DOT_PREFIX " node [");
+ do_dot_log(DOT_PREFIX " fontsize = \"12\"");
+ do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\"");
+ do_dot_log(DOT_PREFIX " fontcolor = \"black\"");
+ do_dot_log(DOT_PREFIX " shape = \"ellipse\"");
+ do_dot_log(DOT_PREFIX " color = \"black\"");
+ do_dot_log(DOT_PREFIX " ]");
+ do_dot_log(DOT_PREFIX " edge [");
+ do_dot_log(DOT_PREFIX " fontsize = \"12\"");
+ do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\"");
+ do_dot_log(DOT_PREFIX " fontcolor = \"black\"");
+ do_dot_log(DOT_PREFIX " color = \"black\"");
+ do_dot_log(DOT_PREFIX " ]");
+ do_dot_log(DOT_PREFIX "// special nodes");
+ do_dot_log(DOT_PREFIX " \"S_PENDING\" ");
+ do_dot_log(DOT_PREFIX " [");
+ do_dot_log(DOT_PREFIX " color = \"blue\"");
+ do_dot_log(DOT_PREFIX " fontcolor = \"blue\"");
+ do_dot_log(DOT_PREFIX " ]");
+ do_dot_log(DOT_PREFIX " \"S_TERMINATE\" ");
+ do_dot_log(DOT_PREFIX " [");
+ do_dot_log(DOT_PREFIX " color = \"red\"");
+ do_dot_log(DOT_PREFIX " fontcolor = \"red\"");
+ do_dot_log(DOT_PREFIX " ]");
+ do_dot_log(DOT_PREFIX "// DC only nodes");
+ do_dot_log(DOT_PREFIX " \"S_INTEGRATION\" [ fontcolor = \"green\" ]");
+ do_dot_log(DOT_PREFIX " \"S_POLICY_ENGINE\" [ fontcolor = \"green\" ]");
+ do_dot_log(DOT_PREFIX " \"S_TRANSITION_ENGINE\" [ fontcolor = \"green\" ]");
+ do_dot_log(DOT_PREFIX " \"S_RELEASE_DC\" [ fontcolor = \"green\" ]");
+ do_dot_log(DOT_PREFIX " \"S_IDLE\" [ fontcolor = \"green\" ]");
+}
+
+static void
+do_fsa_action(fsa_data_t * fsa_data, long long an_action,
+ void (*function) (long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t * msg_data))
+{
+ controld_clear_fsa_action_flags(an_action);
+ crm_trace(DOT_PREFIX "\t// %s", fsa_action2string(an_action));
+ function(an_action, fsa_data->fsa_cause, controld_globals.fsa_state,
+ fsa_data->fsa_input, fsa_data);
+}
+
+static const uint64_t startup_actions =
+ A_STARTUP | A_CIB_START | A_LRM_CONNECT | A_HA_CONNECT | A_READCONFIG |
+ A_STARTED | A_CL_JOIN_QUERY;
+
+// A_LOG, A_WARN, A_ERROR
+void
+do_log(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t *msg_data)
+{
+ unsigned log_type = LOG_TRACE;
+
+ if (action & A_LOG) {
+ log_type = LOG_INFO;
+ } else if (action & A_WARN) {
+ log_type = LOG_WARNING;
+ } else if (action & A_ERROR) {
+ log_type = LOG_ERR;
+ }
+
+ do_crm_log(log_type, "Input %s received in state %s from %s",
+ fsa_input2string(msg_data->fsa_input),
+ fsa_state2string(cur_state), msg_data->origin);
+
+ if (msg_data->data_type == fsa_dt_ha_msg) {
+ ha_msg_input_t *input = fsa_typed_data(msg_data->data_type);
+
+ crm_log_xml_debug(input->msg, __func__);
+
+ } else if (msg_data->data_type == fsa_dt_xml) {
+ xmlNode *input = fsa_typed_data(msg_data->data_type);
+
+ crm_log_xml_debug(input, __func__);
+
+ } else if (msg_data->data_type == fsa_dt_lrm) {
+ lrmd_event_data_t *input = fsa_typed_data(msg_data->data_type);
+
+ do_crm_log(log_type,
+ "Resource %s: Call ID %d returned %d (%d)."
+ " New status if rc=0: %s",
+ input->rsc_id, input->call_id, input->rc,
+ input->op_status, (char *)input->user_data);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Initialize the FSA trigger
+ */
+void
+controld_init_fsa_trigger(void)
+{
+ fsa_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL);
+}
+
+/*!
+ * \internal
+ * \brief Destroy the FSA trigger
+ */
+void
+controld_destroy_fsa_trigger(void)
+{
+ // This basically will not work, since mainloop has a reference to it
+ mainloop_destroy_trigger(fsa_trigger);
+ fsa_trigger = NULL;
+}
+
+/*!
+ * \internal
+ * \brief Trigger an FSA invocation
+ *
+ * \param[in] fn Calling function name
+ * \param[in] line Line number where call occurred
+ */
+void
+controld_trigger_fsa_as(const char *fn, int line)
+{
+ if (fsa_trigger != NULL) {
+ crm_trace("%s:%d - Triggered FSA invocation", fn, line);
+ mainloop_set_trigger(fsa_trigger);
+ }
+}
+
+enum crmd_fsa_state
+s_crmd_fsa(enum crmd_fsa_cause cause)
+{
+ controld_globals_t *globals = &controld_globals;
+ fsa_data_t *fsa_data = NULL;
+ uint64_t register_copy = controld_globals.fsa_input_register;
+ uint64_t new_actions = A_NOTHING;
+ enum crmd_fsa_state last_state;
+
+ crm_trace("FSA invoked with Cause: %s\tState: %s",
+ fsa_cause2string(cause),
+ fsa_state2string(globals->fsa_state));
+
+ fsa_dump_actions(controld_globals.fsa_actions, "Initial");
+
+ controld_clear_global_flags(controld_fsa_is_stalled);
+ if ((controld_globals.fsa_message_queue == NULL)
+ && (controld_globals.fsa_actions != A_NOTHING)) {
+ /* fake the first message so we can get into the loop */
+ fsa_data = calloc(1, sizeof(fsa_data_t));
+ fsa_data->fsa_input = I_NULL;
+ fsa_data->fsa_cause = C_FSA_INTERNAL;
+ fsa_data->origin = __func__;
+ fsa_data->data_type = fsa_dt_none;
+ controld_globals.fsa_message_queue
+ = g_list_append(controld_globals.fsa_message_queue, fsa_data);
+ fsa_data = NULL;
+ }
+ while ((controld_globals.fsa_message_queue != NULL)
+ && !pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) {
+ crm_trace("Checking messages (%d remaining)",
+ g_list_length(controld_globals.fsa_message_queue));
+
+ fsa_data = get_message();
+ if(fsa_data == NULL) {
+ continue;
+ }
+
+ log_fsa_input(fsa_data);
+
+ /* add any actions back to the queue */
+ controld_set_fsa_action_flags(fsa_data->actions);
+ fsa_dump_actions(fsa_data->actions, "Restored actions");
+
+ /* get the next batch of actions */
+ new_actions = controld_fsa_get_action(fsa_data->fsa_input);
+ controld_set_fsa_action_flags(new_actions);
+ fsa_dump_actions(new_actions, "New actions");
+
+ if (fsa_data->fsa_input != I_NULL && fsa_data->fsa_input != I_ROUTER) {
+ crm_debug("Processing %s: [ state=%s cause=%s origin=%s ]",
+ fsa_input2string(fsa_data->fsa_input),
+ fsa_state2string(globals->fsa_state),
+ fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
+ }
+
+ /* logging : *before* the state is changed */
+ if (pcmk_is_set(controld_globals.fsa_actions, A_ERROR)) {
+ do_fsa_action(fsa_data, A_ERROR, do_log);
+ }
+ if (pcmk_is_set(controld_globals.fsa_actions, A_WARN)) {
+ do_fsa_action(fsa_data, A_WARN, do_log);
+ }
+ if (pcmk_is_set(controld_globals.fsa_actions, A_LOG)) {
+ do_fsa_action(fsa_data, A_LOG, do_log);
+ }
+
+ /* update state variables */
+ last_state = globals->fsa_state;
+ globals->fsa_state = controld_fsa_get_next_state(fsa_data->fsa_input);
+
+ /*
+ * Remove certain actions during shutdown
+ */
+ if ((globals->fsa_state == S_STOPPING)
+ || pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ controld_clear_fsa_action_flags(startup_actions);
+ }
+
+ /*
+ * Hook for change of state.
+ * Allows actions to be added or removed when entering a state
+ */
+ if (last_state != globals->fsa_state) {
+ do_state_transition(last_state, globals->fsa_state, fsa_data);
+ } else {
+ do_dot_log(DOT_PREFIX "\t// FSA input: State=%s \tCause=%s"
+ " \tInput=%s \tOrigin=%s() \tid=%d",
+ fsa_state2string(globals->fsa_state),
+ fsa_cause2string(fsa_data->fsa_cause),
+ fsa_input2string(fsa_data->fsa_input), fsa_data->origin, fsa_data->id);
+ }
+
+ /* start doing things... */
+ s_crmd_fsa_actions(fsa_data);
+ delete_fsa_input(fsa_data);
+ fsa_data = NULL;
+ }
+
+ if ((controld_globals.fsa_message_queue != NULL)
+ || (controld_globals.fsa_actions != A_NOTHING)
+ || pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) {
+
+ crm_debug("Exiting the FSA: queue=%d, fsa_actions=%#llx, stalled=%s",
+ g_list_length(controld_globals.fsa_message_queue),
+ (unsigned long long) controld_globals.fsa_actions,
+ pcmk__btoa(pcmk_is_set(controld_globals.flags,
+ controld_fsa_is_stalled)));
+ } else {
+ crm_trace("Exiting the FSA");
+ }
+
+ /* cleanup inputs? */
+ if (register_copy != controld_globals.fsa_input_register) {
+ uint64_t same = register_copy & controld_globals.fsa_input_register;
+
+ fsa_dump_inputs(LOG_DEBUG, "Added",
+ controld_globals.fsa_input_register ^ same);
+ fsa_dump_inputs(LOG_DEBUG, "Removed", register_copy ^ same);
+ }
+
+ fsa_dump_actions(controld_globals.fsa_actions, "Remaining");
+ fsa_dump_queue(LOG_DEBUG);
+
+ return globals->fsa_state;
+}
+
+void
+s_crmd_fsa_actions(fsa_data_t * fsa_data)
+{
+ /*
+ * Process actions in order of priority but do only one
+ * action at a time to avoid complicating the ordering.
+ */
+ CRM_CHECK(fsa_data != NULL, return);
+ while ((controld_globals.fsa_actions != A_NOTHING)
+ && !pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) {
+
+ /* regular action processing in order of action priority
+ *
+ * Make sure all actions that connect to required systems
+ * are performed first
+ */
+ if (pcmk_is_set(controld_globals.fsa_actions, A_ERROR)) {
+ do_fsa_action(fsa_data, A_ERROR, do_log);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_WARN)) {
+ do_fsa_action(fsa_data, A_WARN, do_log);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_LOG)) {
+ do_fsa_action(fsa_data, A_LOG, do_log);
+
+ /* get out of here NOW! before anything worse happens */
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_EXIT_1)) {
+ do_fsa_action(fsa_data, A_EXIT_1, do_exit);
+
+ /* sub-system restart */
+ } else if (pcmk_all_flags_set(controld_globals.fsa_actions,
+ O_LRM_RECONNECT)) {
+ do_fsa_action(fsa_data, O_LRM_RECONNECT, do_lrm_control);
+
+ } else if (pcmk_all_flags_set(controld_globals.fsa_actions,
+ O_CIB_RESTART)) {
+ do_fsa_action(fsa_data, O_CIB_RESTART, do_cib_control);
+
+ } else if (pcmk_all_flags_set(controld_globals.fsa_actions,
+ O_PE_RESTART)) {
+ do_fsa_action(fsa_data, O_PE_RESTART, do_pe_control);
+
+ } else if (pcmk_all_flags_set(controld_globals.fsa_actions,
+ O_TE_RESTART)) {
+ do_fsa_action(fsa_data, O_TE_RESTART, do_te_control);
+
+ /* essential start tasks */
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_STARTUP)) {
+ do_fsa_action(fsa_data, A_STARTUP, do_startup);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_CIB_START)) {
+ do_fsa_action(fsa_data, A_CIB_START, do_cib_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_HA_CONNECT)) {
+ do_fsa_action(fsa_data, A_HA_CONNECT, do_ha_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_READCONFIG)) {
+ do_fsa_action(fsa_data, A_READCONFIG, do_read_config);
+
+ /* sub-system start/connect */
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_CONNECT)) {
+ do_fsa_action(fsa_data, A_LRM_CONNECT, do_lrm_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_START)) {
+ do_fsa_action(fsa_data, A_TE_START, do_te_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_START)) {
+ do_fsa_action(fsa_data, A_PE_START, do_pe_control);
+
+ /* Timers */
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_TIMER_STOP)) {
+ do_fsa_action(fsa_data, A_DC_TIMER_STOP, do_timer_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_INTEGRATE_TIMER_STOP)) {
+ do_fsa_action(fsa_data, A_INTEGRATE_TIMER_STOP, do_timer_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_INTEGRATE_TIMER_START)) {
+ do_fsa_action(fsa_data, A_INTEGRATE_TIMER_START, do_timer_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_FINALIZE_TIMER_STOP)) {
+ do_fsa_action(fsa_data, A_FINALIZE_TIMER_STOP, do_timer_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_FINALIZE_TIMER_START)) {
+ do_fsa_action(fsa_data, A_FINALIZE_TIMER_START, do_timer_control);
+
+ /*
+ * Highest priority actions
+ */
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_MSG_ROUTE)) {
+ do_fsa_action(fsa_data, A_MSG_ROUTE, do_msg_route);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_RECOVER)) {
+ do_fsa_action(fsa_data, A_RECOVER, do_recover);
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_CL_JOIN_RESULT)) {
+ do_fsa_action(fsa_data, A_CL_JOIN_RESULT,
+ do_cl_join_finalize_respond);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_CL_JOIN_REQUEST)) {
+ do_fsa_action(fsa_data, A_CL_JOIN_REQUEST,
+ do_cl_join_offer_respond);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_SHUTDOWN_REQ)) {
+ do_fsa_action(fsa_data, A_SHUTDOWN_REQ, do_shutdown_req);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_ELECTION_VOTE)) {
+ do_fsa_action(fsa_data, A_ELECTION_VOTE, do_election_vote);
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_ELECTION_COUNT)) {
+ do_fsa_action(fsa_data, A_ELECTION_COUNT, do_election_count_vote);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_EVENT)) {
+ do_fsa_action(fsa_data, A_LRM_EVENT, do_lrm_event);
+
+ /*
+ * High priority actions
+ */
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_STARTED)) {
+ do_fsa_action(fsa_data, A_STARTED, do_started);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_CL_JOIN_QUERY)) {
+ do_fsa_action(fsa_data, A_CL_JOIN_QUERY, do_cl_join_query);
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_DC_TIMER_START)) {
+ do_fsa_action(fsa_data, A_DC_TIMER_START, do_timer_control);
+
+ /*
+ * Medium priority actions
+ * - Membership
+ */
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_TAKEOVER)) {
+ do_fsa_action(fsa_data, A_DC_TAKEOVER, do_dc_takeover);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_RELEASE)) {
+ do_fsa_action(fsa_data, A_DC_RELEASE, do_dc_release);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_JOIN_FINAL)) {
+ do_fsa_action(fsa_data, A_DC_JOIN_FINAL, do_dc_join_final);
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_ELECTION_CHECK)) {
+ do_fsa_action(fsa_data, A_ELECTION_CHECK, do_election_check);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_ELECTION_START)) {
+ do_fsa_action(fsa_data, A_ELECTION_START, do_election_vote);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_DC_JOIN_OFFER_ALL)) {
+ do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ALL, do_dc_join_offer_all);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_DC_JOIN_OFFER_ONE)) {
+ do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ONE, do_dc_join_offer_one);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_DC_JOIN_PROCESS_REQ)) {
+ do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_REQ,
+ do_dc_join_filter_offer);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_DC_JOIN_PROCESS_ACK)) {
+ do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_ACK, do_dc_join_ack);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_DC_JOIN_FINALIZE)) {
+ do_fsa_action(fsa_data, A_DC_JOIN_FINALIZE, do_dc_join_finalize);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_CL_JOIN_ANNOUNCE)) {
+ do_fsa_action(fsa_data, A_CL_JOIN_ANNOUNCE, do_cl_join_announce);
+
+ /*
+ * Low(er) priority actions
+ * Make sure the CIB is always updated before invoking the
+ * scheduler, and the scheduler before the transition engine.
+ */
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_HALT)) {
+ do_fsa_action(fsa_data, A_TE_HALT, do_te_invoke);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_CANCEL)) {
+ do_fsa_action(fsa_data, A_TE_CANCEL, do_te_invoke);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_INVOKE)) {
+ do_fsa_action(fsa_data, A_LRM_INVOKE, do_lrm_invoke);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_INVOKE)) {
+ do_fsa_action(fsa_data, A_PE_INVOKE, do_pe_invoke);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_INVOKE)) {
+ do_fsa_action(fsa_data, A_TE_INVOKE, do_te_invoke);
+
+ /* Shutdown actions */
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_RELEASED)) {
+ do_fsa_action(fsa_data, A_DC_RELEASED, do_dc_release);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_STOP)) {
+ do_fsa_action(fsa_data, A_PE_STOP, do_pe_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_STOP)) {
+ do_fsa_action(fsa_data, A_TE_STOP, do_te_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_SHUTDOWN)) {
+ do_fsa_action(fsa_data, A_SHUTDOWN, do_shutdown);
+ } else if (pcmk_is_set(controld_globals.fsa_actions,
+ A_LRM_DISCONNECT)) {
+ do_fsa_action(fsa_data, A_LRM_DISCONNECT, do_lrm_control);
+
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_HA_DISCONNECT)) {
+ do_fsa_action(fsa_data, A_HA_DISCONNECT, do_ha_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_CIB_STOP)) {
+ do_fsa_action(fsa_data, A_CIB_STOP, do_cib_control);
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_STOP)) {
+ do_fsa_action(fsa_data, A_STOP, do_stop);
+
+ /* exit gracefully */
+ } else if (pcmk_is_set(controld_globals.fsa_actions, A_EXIT_0)) {
+ do_fsa_action(fsa_data, A_EXIT_0, do_exit);
+
+ /* Error checking and reporting */
+ } else {
+ crm_err("Action %s not supported "CRM_XS" %#llx",
+ fsa_action2string(controld_globals.fsa_actions),
+ (unsigned long long) controld_globals.fsa_actions);
+ register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, fsa_data, NULL,
+ __func__);
+ }
+ }
+}
+
+void
+log_fsa_input(fsa_data_t * stored_msg)
+{
+ CRM_ASSERT(stored_msg);
+ crm_trace("Processing queued input %d", stored_msg->id);
+ if (stored_msg->fsa_cause == C_LRM_OP_CALLBACK) {
+ crm_trace("FSA processing LRM callback from %s", stored_msg->origin);
+
+ } else if (stored_msg->data == NULL) {
+ crm_trace("FSA processing input from %s", stored_msg->origin);
+
+ } else {
+ ha_msg_input_t *ha_input = fsa_typed_data_adv(stored_msg, fsa_dt_ha_msg,
+ __func__);
+
+ crm_trace("FSA processing XML message from %s", stored_msg->origin);
+ crm_log_xml_trace(ha_input->xml, "FSA message data");
+ }
+}
+
+static void
+check_join_counts(fsa_data_t *msg_data)
+{
+ int count;
+ guint npeers;
+
+ count = crmd_join_phase_count(crm_join_finalized);
+ if (count > 0) {
+ crm_err("%d cluster node%s failed to confirm join",
+ count, pcmk__plural_s(count));
+ crmd_join_phase_log(LOG_NOTICE);
+ return;
+ }
+
+ npeers = crm_active_peers();
+ count = crmd_join_phase_count(crm_join_confirmed);
+ if (count == npeers) {
+ if (npeers == 1) {
+ crm_debug("Sole active cluster node is fully joined");
+ } else {
+ crm_debug("All %d active cluster nodes are fully joined", count);
+ }
+
+ } else if (count > npeers) {
+ crm_err("New election needed because more nodes confirmed join "
+ "than are in membership (%d > %u)", count, npeers);
+ register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
+
+ } else if (controld_globals.membership_id != crm_peer_seq) {
+ crm_info("New join needed because membership changed (%llu -> %llu)",
+ controld_globals.membership_id, crm_peer_seq);
+ register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
+
+ } else {
+ crm_warn("Only %d of %u active cluster nodes fully joined "
+ "(%d did not respond to offer)",
+ count, npeers, crmd_join_phase_count(crm_join_welcomed));
+ }
+}
+
+static void
+do_state_transition(enum crmd_fsa_state cur_state,
+ enum crmd_fsa_state next_state, fsa_data_t *msg_data)
+{
+ int level = LOG_INFO;
+ int count = 0;
+ gboolean clear_recovery_bit = TRUE;
+#if 0
+ uint64_t original_fsa_actions = controld_globals.fsa_actions;
+#endif
+
+ enum crmd_fsa_cause cause = msg_data->fsa_cause;
+ enum crmd_fsa_input current_input = msg_data->fsa_input;
+
+ const char *state_from = fsa_state2string(cur_state);
+ const char *state_to = fsa_state2string(next_state);
+ const char *input = fsa_input2string(current_input);
+
+ CRM_LOG_ASSERT(cur_state != next_state);
+
+ do_dot_log(DOT_PREFIX "\t%s -> %s [ label=%s cause=%s origin=%s ]",
+ state_from, state_to, input, fsa_cause2string(cause), msg_data->origin);
+
+ if (cur_state == S_IDLE || next_state == S_IDLE) {
+ level = LOG_NOTICE;
+ } else if (cur_state == S_NOT_DC || next_state == S_NOT_DC) {
+ level = LOG_NOTICE;
+ } else if (cur_state == S_ELECTION) {
+ level = LOG_NOTICE;
+ } else if (cur_state == S_STARTING) {
+ level = LOG_NOTICE;
+ } else if (next_state == S_RECOVERY) {
+ level = LOG_WARNING;
+ }
+
+ do_crm_log(level, "State transition %s -> %s "
+ CRM_XS " input=%s cause=%s origin=%s",
+ state_from, state_to, input, fsa_cause2string(cause),
+ msg_data->origin);
+
+ if (next_state != S_ELECTION && cur_state != S_RELEASE_DC) {
+ controld_stop_current_election_timeout();
+ }
+#if 0
+ if ((controld_globals.fsa_input_register & R_SHUTDOWN)) {
+ controld_set_fsa_action_flags(A_DC_TIMER_STOP);
+ }
+#endif
+ if (next_state == S_INTEGRATION) {
+ controld_set_fsa_action_flags(A_INTEGRATE_TIMER_START);
+ } else {
+ controld_set_fsa_action_flags(A_INTEGRATE_TIMER_STOP);
+ }
+
+ if (next_state == S_FINALIZE_JOIN) {
+ controld_set_fsa_action_flags(A_FINALIZE_TIMER_START);
+ } else {
+ controld_set_fsa_action_flags(A_FINALIZE_TIMER_STOP);
+ }
+
+ if (next_state != S_PENDING) {
+ controld_set_fsa_action_flags(A_DC_TIMER_STOP);
+ }
+ if (next_state != S_IDLE) {
+ controld_stop_recheck_timer();
+ }
+
+ if (cur_state == S_FINALIZE_JOIN && next_state == S_POLICY_ENGINE) {
+ populate_cib_nodes(node_update_quick|node_update_all, __func__);
+ }
+
+ switch (next_state) {
+ case S_PENDING:
+ {
+ cib_t *cib_conn = controld_globals.cib_conn;
+ cib_conn->cmds->set_secondary(cib_conn, cib_scope_local);
+ }
+ update_dc(NULL);
+ break;
+
+ case S_ELECTION:
+ update_dc(NULL);
+ break;
+
+ case S_NOT_DC:
+ controld_reset_counter_election_timer();
+ purge_stonith_cleanup();
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ crm_info("(Re)Issuing shutdown request now" " that we have a new DC");
+ controld_set_fsa_action_flags(A_SHUTDOWN_REQ);
+ }
+ CRM_LOG_ASSERT(controld_globals.dc_name != NULL);
+ if (controld_globals.dc_name == NULL) {
+ crm_err("Reached S_NOT_DC without a DC" " being recorded");
+ }
+ break;
+
+ case S_RECOVERY:
+ clear_recovery_bit = FALSE;
+ break;
+
+ case S_FINALIZE_JOIN:
+ CRM_LOG_ASSERT(AM_I_DC);
+ if (cause == C_TIMER_POPPED) {
+ crm_warn("Progressed to state %s after %s",
+ fsa_state2string(next_state), fsa_cause2string(cause));
+ }
+ count = crmd_join_phase_count(crm_join_welcomed);
+ if (count > 0) {
+ crm_warn("%d cluster node%s failed to respond to join offer",
+ count, pcmk__plural_s(count));
+ crmd_join_phase_log(LOG_NOTICE);
+
+ } else {
+ crm_debug("All cluster nodes (%d) responded to join offer",
+ crmd_join_phase_count(crm_join_integrated));
+ }
+ break;
+
+ case S_POLICY_ENGINE:
+ controld_reset_counter_election_timer();
+ CRM_LOG_ASSERT(AM_I_DC);
+ if (cause == C_TIMER_POPPED) {
+ crm_info("Progressed to state %s after %s",
+ fsa_state2string(next_state), fsa_cause2string(cause));
+ }
+ check_join_counts(msg_data);
+ break;
+
+ case S_STOPPING:
+ case S_TERMINATE:
+ /* possibly redundant */
+ controld_set_fsa_input_flags(R_SHUTDOWN);
+ break;
+
+ case S_IDLE:
+ CRM_LOG_ASSERT(AM_I_DC);
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ crm_info("(Re)Issuing shutdown request now" " that we are the DC");
+ controld_set_fsa_action_flags(A_SHUTDOWN_REQ);
+ }
+ controld_start_recheck_timer();
+ break;
+
+ default:
+ break;
+ }
+
+ if (clear_recovery_bit && next_state != S_PENDING) {
+ controld_clear_fsa_action_flags(A_RECOVER);
+ } else if (clear_recovery_bit == FALSE) {
+ controld_set_fsa_action_flags(A_RECOVER);
+ }
+
+#if 0
+ if (original_fsa_actions != controld_globals.fsa_actions) {
+ fsa_dump_actions(original_fsa_actions ^ controld_globals.fsa_actions,
+ "New actions");
+ }
+#endif
+}
diff --git a/daemons/controld/controld_fsa.h b/daemons/controld/controld_fsa.h
new file mode 100644
index 0000000..2b79f07
--- /dev/null
+++ b/daemons/controld/controld_fsa.h
@@ -0,0 +1,694 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef CRMD_FSA__H
+# define CRMD_FSA__H
+
+# include <crm/crm.h>
+# include <crm/cib.h>
+# include <crm/common/xml.h>
+# include <crm/common/mainloop.h>
+# include <crm/cluster.h>
+# include <crm/cluster/election_internal.h>
+# include <crm/common/ipc_internal.h>
+
+/*! States the controller can be in */
+enum crmd_fsa_state {
+ S_IDLE = 0, /* Nothing happening */
+
+ S_ELECTION, /* Take part in the election algorithm as
+ * described below
+ */
+ S_INTEGRATION, /* integrate that status of new nodes (which is
+ * all of them if we have just been elected DC)
+ * to form a complete and up-to-date picture of
+ * the CIB
+ */
+ S_FINALIZE_JOIN, /* integrate that status of new nodes (which is
+ * all of them if we have just been elected DC)
+ * to form a complete and up-to-date picture of
+ * the CIB
+ */
+ S_NOT_DC, /* we are in non-DC mode */
+ S_POLICY_ENGINE, /* Determine next stable state of the cluster */
+ S_RECOVERY, /* Something bad happened, check everything is ok
+ * before continuing and attempt to recover if
+ * required
+ */
+ S_RELEASE_DC, /* we were the DC, but now we arent anymore,
+ * possibly by our own request, and we should
+ * release all unnecessary sub-systems, finish
+ * any pending actions, do general cleanup and
+ * unset anything that makes us think we are
+ * special :)
+ */
+ S_STARTING, /* we are just starting out */
+ S_PENDING, /* we are not a full/active member yet */
+ S_STOPPING, /* We are in the final stages of shutting down */
+ S_TERMINATE, /* We are going to shutdown, this is the equiv of
+ * "Sending TERM signal to all processes" in Linux
+ * and in worst case scenarios could be considered
+ * a self STONITH
+ */
+ S_TRANSITION_ENGINE, /* Attempt to make the calculated next stable
+ * state of the cluster a reality
+ */
+
+ S_HALT, /* Freeze - don't do anything
+ * Something bad happened that needs the admin to fix
+ * Wait for I_ELECTION
+ */
+
+ /* ----------- Last input found in table is above ---------- */
+ S_ILLEGAL /* This is an illegal FSA state */
+ /* (must be last) */
+};
+
+# define MAXSTATE S_ILLEGAL
+
+/*
+ Once we start and do some basic sanity checks, we go into the
+ S_NOT_DC state and await instructions from the DC or input from
+ the cluster layer which indicates the election algorithm needs to run.
+
+ If the election algorithm is triggered, we enter the S_ELECTION state
+ from where we can either go back to the S_NOT_DC state or progress
+ to the S_INTEGRATION state (or S_RELEASE_DC if we used to be the DC
+ but aren't anymore). See the libcrmcluster API documentation for more
+ information about the election algorithm.
+
+ Once the election is complete, if we are the DC, we enter the
+ S_INTEGRATION state which is a DC-in-waiting style state. We are
+ the DC, but we shouldn't do anything yet because we may not have an
+ up-to-date picture of the cluster. There may of course be times
+ when this fails, so we should go back to the S_RECOVERY stage and
+ check everything is ok. We may also end up here if a new node came
+ online, since each node is authoritative about itself, and we would want
+ to incorporate its information into the CIB.
+
+ Once we have the latest CIB, we then enter the S_POLICY_ENGINE state
+ where invoke the scheduler. It is possible that between
+ invoking the scheduler and receiving an answer, that we receive
+ more input. In this case, we would discard the orginal result and
+ invoke it again.
+
+ Once we are satisfied with the output from the scheduler, we
+ enter S_TRANSITION_ENGINE and feed the scheduler's output to the
+ Transition Engine who attempts to make the scheduler's
+ calculation a reality. If the transition completes successfully,
+ we enter S_IDLE, otherwise we go back to S_POLICY_ENGINE with the
+ current unstable state and try again.
+
+ Of course, we may be asked to shutdown at any time, however we must
+ progress to S_NOT_DC before doing so. Once we have handed over DC
+ duties to another node, we can then shut down like everyone else,
+ that is, by asking the DC for permission and waiting for it to take all
+ our resources away.
+
+ The case where we are the DC and the only node in the cluster is a
+ special case and handled as an escalation which takes us to
+ S_SHUTDOWN. Similarly, if any other point in the shutdown
+ fails or stalls, this is escalated and we end up in S_TERMINATE.
+
+ At any point, the controller can relay messages for its subsystems,
+ but outbound messages (from subsystems) should probably be blocked
+ until S_INTEGRATION (for the DC) or the join protocol has
+ completed (for non-DC controllers).
+*/
+
+/*======================================
+ *
+ * Inputs/Events/Stimuli to be given to the finite state machine
+ *
+ * Some of these a true events, and others are synthesised based on
+ * the "register" (see below) and the contents or source of messages.
+ *
+ * The machine keeps processing until receiving I_NULL
+ *
+ *======================================*/
+enum crmd_fsa_input {
+/* 0 */
+ I_NULL, /* Nothing happened */
+/* 1 */
+
+ I_CIB_OP, /* An update to the CIB occurred */
+ I_CIB_UPDATE, /* An update to the CIB occurred */
+ I_DC_TIMEOUT, /* We have lost communication with the DC */
+ I_ELECTION, /* Someone started an election */
+ I_PE_CALC, /* The scheduler needs to be invoked */
+ I_RELEASE_DC, /* The election completed and we were not
+ * elected, but we were the DC beforehand
+ */
+ I_ELECTION_DC, /* The election completed and we were (re-)elected
+ * DC
+ */
+ I_ERROR, /* Something bad happened (more serious than
+ * I_FAIL) and may not have been due to the action
+ * being performed. For example, we may have lost
+ * our connection to the CIB.
+ */
+/* 9 */
+ I_FAIL, /* The action failed to complete successfully */
+ I_INTEGRATED,
+ I_FINALIZED,
+ I_NODE_JOIN, /* A node has entered the cluster */
+ I_NOT_DC, /* We are not and were not the DC before or after
+ * the current operation or state
+ */
+ I_RECOVERED, /* The recovery process completed successfully */
+ I_RELEASE_FAIL, /* We could not give up DC status for some reason
+ */
+ I_RELEASE_SUCCESS, /* We are no longer the DC */
+ I_RESTART, /* The current set of actions needs to be
+ * restarted
+ */
+ I_TE_SUCCESS, /* Some non-resource, non-cluster-layer action
+ * is required of us, e.g. ping
+ */
+/* 20 */
+ I_ROUTER, /* Do our job as router and forward this to the
+ * right place
+ */
+ I_SHUTDOWN, /* We are asking to shutdown */
+ I_STOP, /* We have been told to shutdown */
+ I_TERMINATE, /* Actually exit */
+ I_STARTUP,
+ I_PE_SUCCESS, /* The action completed successfully */
+
+ I_JOIN_OFFER, /* The DC is offering membership */
+ I_JOIN_REQUEST, /* The client is requesting membership */
+ I_JOIN_RESULT, /* If not the DC: The result of a join request
+ * Else: A client is responding with its local state info
+ */
+
+ I_WAIT_FOR_EVENT, /* we may be waiting for an async task to "happen"
+ * and until it does, we can't do anything else
+ */
+
+ I_DC_HEARTBEAT, /* The DC is telling us that it is alive and well */
+
+ I_LRM_EVENT,
+
+/* 30 */
+ I_PENDING,
+ I_HALT,
+
+ /* ------------ Last input found in table is above ----------- */
+ I_ILLEGAL /* This is an illegal value for an FSA input */
+ /* (must be last) */
+};
+
+# define MAXINPUT I_ILLEGAL
+
+# define I_MESSAGE I_ROUTER
+
+/*======================================
+ *
+ * actions
+ *
+ * Some of the actions below will always occur together for now, but this may
+ * not always be the case, so they are split up so that they can easily be
+ * called independently in the future, if necessary.
+ *
+ * For example, separating A_LRM_CONNECT from A_STARTUP might be useful
+ * if we ever try to recover from a faulty or disconnected executor.
+ *
+ *======================================*/
+
+ /* Don't do anything */
+# define A_NOTHING 0x0000000000000000ULL
+
+/* -- Startup actions -- */
+ /* Hook to perform any actions (other than connecting to other daemons)
+ * that might be needed as part of the startup.
+ */
+# define A_STARTUP 0x0000000000000001ULL
+ /* Hook to perform any actions that might be needed as part
+ * after startup is successful.
+ */
+# define A_STARTED 0x0000000000000002ULL
+ /* Connect to cluster layer */
+# define A_HA_CONNECT 0x0000000000000004ULL
+# define A_HA_DISCONNECT 0x0000000000000008ULL
+
+# define A_INTEGRATE_TIMER_START 0x0000000000000010ULL
+# define A_INTEGRATE_TIMER_STOP 0x0000000000000020ULL
+# define A_FINALIZE_TIMER_START 0x0000000000000040ULL
+# define A_FINALIZE_TIMER_STOP 0x0000000000000080ULL
+
+/* -- Election actions -- */
+# define A_DC_TIMER_START 0x0000000000000100ULL
+# define A_DC_TIMER_STOP 0x0000000000000200ULL
+# define A_ELECTION_COUNT 0x0000000000000400ULL
+# define A_ELECTION_VOTE 0x0000000000000800ULL
+
+# define A_ELECTION_START 0x0000000000001000ULL
+
+/* -- Message processing -- */
+ /* Process the queue of requests */
+# define A_MSG_PROCESS 0x0000000000002000ULL
+ /* Send the message to the correct recipient */
+# define A_MSG_ROUTE 0x0000000000004000ULL
+
+ /* Send a welcome message to new node(s) */
+# define A_DC_JOIN_OFFER_ONE 0x0000000000008000ULL
+
+/* -- Server Join protocol actions -- */
+ /* Send a welcome message to all nodes */
+# define A_DC_JOIN_OFFER_ALL 0x0000000000010000ULL
+ /* Process the remote node's ack of our join message */
+# define A_DC_JOIN_PROCESS_REQ 0x0000000000020000ULL
+ /* Send out the results of the Join phase */
+# define A_DC_JOIN_FINALIZE 0x0000000000040000ULL
+ /* Send out the results of the Join phase */
+# define A_DC_JOIN_PROCESS_ACK 0x0000000000080000ULL
+
+/* -- Client Join protocol actions -- */
+# define A_CL_JOIN_QUERY 0x0000000000100000ULL
+# define A_CL_JOIN_ANNOUNCE 0x0000000000200000ULL
+ /* Request membership to the DC list */
+# define A_CL_JOIN_REQUEST 0x0000000000400000ULL
+ /* Did the DC accept or reject the request */
+# define A_CL_JOIN_RESULT 0x0000000000800000ULL
+
+/* -- Recovery, DC start/stop -- */
+ /* Something bad happened, try to recover */
+# define A_RECOVER 0x0000000001000000ULL
+ /* Hook to perform any actions (apart from starting, the TE, scheduler,
+ * and gathering the latest CIB) that might be necessary before
+ * giving up the responsibilities of being the DC.
+ */
+# define A_DC_RELEASE 0x0000000002000000ULL
+ /* */
+# define A_DC_RELEASED 0x0000000004000000ULL
+ /* Hook to perform any actions (apart from starting, the TE, scheduler,
+ * and gathering the latest CIB) that might be necessary before
+ * taking over the responsibilities of being the DC.
+ */
+# define A_DC_TAKEOVER 0x0000000008000000ULL
+
+/* -- Shutdown actions -- */
+# define A_SHUTDOWN 0x0000000010000000ULL
+# define A_STOP 0x0000000020000000ULL
+# define A_EXIT_0 0x0000000040000000ULL
+# define A_EXIT_1 0x0000000080000000ULL
+
+# define A_SHUTDOWN_REQ 0x0000000100000000ULL
+# define A_ELECTION_CHECK 0x0000000200000000ULL
+# define A_DC_JOIN_FINAL 0x0000000400000000ULL
+
+/* -- CIB actions -- */
+# define A_CIB_START 0x0000020000000000ULL
+# define A_CIB_STOP 0x0000040000000000ULL
+
+/* -- Transition Engine actions -- */
+ /* Attempt to reach the newly calculated cluster state. This is
+ * only called once per transition (except if it is asked to
+ * stop the transition or start a new one).
+ * Once given a cluster state to reach, the TE will determine
+ * tasks that can be performed in parallel, execute them, wait
+ * for replies and then determine the next set until the new
+ * state is reached or no further tasks can be taken.
+ */
+# define A_TE_INVOKE 0x0000100000000000ULL
+# define A_TE_START 0x0000200000000000ULL
+# define A_TE_STOP 0x0000400000000000ULL
+# define A_TE_CANCEL 0x0000800000000000ULL
+# define A_TE_HALT 0x0001000000000000ULL
+
+/* -- Scheduler actions -- */
+ /* Calculate the next state for the cluster. This is only
+ * invoked once per needed calculation.
+ */
+# define A_PE_INVOKE 0x0002000000000000ULL
+# define A_PE_START 0x0004000000000000ULL
+# define A_PE_STOP 0x0008000000000000ULL
+/* -- Misc actions -- */
+ /* Add a system generate "block" so that resources arent moved
+ * to or are activly moved away from the affected node. This
+ * way we can return quickly even if busy with other things.
+ */
+# define A_NODE_BLOCK 0x0010000000000000ULL
+ /* Update our information in the local CIB */
+# define A_UPDATE_NODESTATUS 0x0020000000000000ULL
+# define A_READCONFIG 0x0080000000000000ULL
+
+/* -- LRM Actions -- */
+ /* Connect to pacemaker-execd */
+# define A_LRM_CONNECT 0x0100000000000000ULL
+ /* Disconnect from pacemaker-execd */
+# define A_LRM_DISCONNECT 0x0200000000000000ULL
+# define A_LRM_INVOKE 0x0400000000000000ULL
+# define A_LRM_EVENT 0x0800000000000000ULL
+
+/* -- Logging actions -- */
+# define A_LOG 0x1000000000000000ULL
+# define A_ERROR 0x2000000000000000ULL
+# define A_WARN 0x4000000000000000ULL
+
+# define O_EXIT (A_SHUTDOWN|A_STOP|A_LRM_DISCONNECT|A_HA_DISCONNECT|A_EXIT_0|A_CIB_STOP)
+# define O_RELEASE (A_DC_TIMER_STOP|A_DC_RELEASE|A_PE_STOP|A_TE_STOP|A_DC_RELEASED)
+# define O_PE_RESTART (A_PE_START|A_PE_STOP)
+# define O_TE_RESTART (A_TE_START|A_TE_STOP)
+# define O_CIB_RESTART (A_CIB_START|A_CIB_STOP)
+# define O_LRM_RECONNECT (A_LRM_CONNECT|A_LRM_DISCONNECT)
+# define O_DC_TIMER_RESTART (A_DC_TIMER_STOP|A_DC_TIMER_START)
+/*======================================
+ *
+ * "register" contents
+ *
+ * Things we may want to remember regardless of which state we are in.
+ *
+ * These also count as inputs for synthesizing I_*
+ *
+ *======================================*/
+# define R_THE_DC 0x00000001ULL
+ /* Are we the DC? */
+# define R_STARTING 0x00000002ULL
+ /* Are we starting up? */
+# define R_SHUTDOWN 0x00000004ULL
+ /* Are we trying to shut down? */
+# define R_STAYDOWN 0x00000008ULL
+ /* Should we restart? */
+
+# define R_JOIN_OK 0x00000010ULL /* Have we completed the join process */
+# define R_READ_CONFIG 0x00000040ULL
+# define R_INVOKE_PE 0x00000080ULL // Should the scheduler be invoked?
+
+# define R_CIB_CONNECTED 0x00000100ULL
+ /* Is the CIB connected? */
+# define R_PE_CONNECTED 0x00000200ULL // Is the scheduler connected?
+# define R_TE_CONNECTED 0x00000400ULL
+ /* Is the Transition Engine connected? */
+# define R_LRM_CONNECTED 0x00000800ULL // Is pacemaker-execd connected?
+
+# define R_CIB_REQUIRED 0x00001000ULL
+ /* Is the CIB required? */
+# define R_PE_REQUIRED 0x00002000ULL // Is the scheduler required?
+# define R_TE_REQUIRED 0x00004000ULL
+ /* Is the Transition Engine required? */
+# define R_ST_REQUIRED 0x00008000ULL
+ /* Is the Stonith daemon required? */
+
+# define R_CIB_DONE 0x00010000ULL
+ /* Have we calculated the CIB? */
+# define R_HAVE_CIB 0x00020000ULL /* Do we have an up-to-date CIB */
+
+# define R_MEMBERSHIP 0x00100000ULL /* Have we got cluster layer data yet */
+# define R_PEER_DATA 0x00200000ULL /* Have we got T_CL_STATUS data yet */
+
+# define R_HA_DISCONNECTED 0x00400000ULL /* did we sign out of our own accord */
+
+# define R_REQ_PEND 0x01000000ULL
+ /* Are there Requests waiting for
+ processing? */
+# define R_PE_PEND 0x02000000ULL // Are we awaiting reply from scheduler?
+# define R_TE_PEND 0x04000000ULL
+ /* Has the TE been invoked and we're
+ awaiting completion? */
+# define R_RESP_PEND 0x08000000ULL
+ /* Do we have clients waiting on a
+ response? if so perhaps we shouldn't
+ stop yet */
+
+# define R_SENT_RSC_STOP 0x20000000ULL /* Have we sent a stop action to all
+ * resources in preparation for
+ * shutting down */
+
+# define R_IN_RECOVERY 0x80000000ULL
+
+#define CRM_DIRECT_NACK_RC (99) // Deprecated (see PCMK_EXEC_INVALID)
+
+enum crmd_fsa_cause {
+ C_UNKNOWN = 0,
+ C_STARTUP,
+ C_IPC_MESSAGE,
+ C_HA_MESSAGE,
+ C_CRMD_STATUS_CALLBACK,
+ C_LRM_OP_CALLBACK,
+ C_TIMER_POPPED,
+ C_SHUTDOWN,
+ C_FSA_INTERNAL,
+};
+
+enum fsa_data_type {
+ fsa_dt_none,
+ fsa_dt_ha_msg,
+ fsa_dt_xml,
+ fsa_dt_lrm,
+};
+
+typedef struct fsa_data_s fsa_data_t;
+struct fsa_data_s {
+ int id;
+ enum crmd_fsa_input fsa_input;
+ enum crmd_fsa_cause fsa_cause;
+ uint64_t actions;
+ const char *origin;
+ void *data;
+ enum fsa_data_type data_type;
+};
+
+#define controld_set_fsa_input_flags(flags_to_set) do { \
+ controld_globals.fsa_input_register \
+ = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
+ "FSA input", "controller", \
+ controld_globals.fsa_input_register, \
+ (flags_to_set), #flags_to_set); \
+ } while (0)
+
+#define controld_clear_fsa_input_flags(flags_to_clear) do { \
+ controld_globals.fsa_input_register \
+ = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
+ "FSA input", "controller", \
+ controld_globals.fsa_input_register, \
+ (flags_to_clear), \
+ #flags_to_clear); \
+ } while (0)
+
+#define controld_set_fsa_action_flags(flags_to_set) do { \
+ controld_globals.fsa_actions \
+ = pcmk__set_flags_as(__func__, __LINE__, LOG_DEBUG, \
+ "FSA action", "controller", \
+ controld_globals.fsa_actions, \
+ (flags_to_set), #flags_to_set); \
+ } while (0)
+
+#define controld_clear_fsa_action_flags(flags_to_clear) do { \
+ controld_globals.fsa_actions \
+ = pcmk__clear_flags_as(__func__, __LINE__, LOG_DEBUG, \
+ "FSA action", "controller", \
+ controld_globals.fsa_actions, \
+ (flags_to_clear), #flags_to_clear); \
+ } while (0)
+
+// This should be moved elsewhere
+xmlNode *controld_query_executor_state(void);
+
+const char *fsa_input2string(enum crmd_fsa_input input);
+const char *fsa_state2string(enum crmd_fsa_state state);
+const char *fsa_cause2string(enum crmd_fsa_cause cause);
+const char *fsa_action2string(long long action);
+
+enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause);
+
+enum crmd_fsa_state controld_fsa_get_next_state(enum crmd_fsa_input input);
+
+uint64_t controld_fsa_get_action(enum crmd_fsa_input input);
+
+void controld_init_fsa_trigger(void);
+void controld_destroy_fsa_trigger(void);
+
+void free_max_generation(void);
+
+# define AM_I_DC pcmk_is_set(controld_globals.fsa_input_register, R_THE_DC)
+# define controld_trigger_fsa() controld_trigger_fsa_as(__func__, __LINE__)
+
+void controld_trigger_fsa_as(const char *fn, int line);
+
+/* A_READCONFIG */
+void do_read_config(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t *msg_data);
+
+/* A_PE_INVOKE */
+void do_pe_invoke(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t *msg_data);
+
+/* A_LOG */
+void do_log(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_STARTUP */
+void do_startup(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_CIB_START, STOP, RESTART */
+void do_cib_control(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_HA_CONNECT */
+void do_ha_control(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_LRM_CONNECT */
+void do_lrm_control(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_PE_START, STOP, RESTART */
+void do_pe_control(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_TE_START, STOP, RESTART */
+void do_te_control(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_STARTED */
+void do_started(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_MSG_ROUTE */
+void do_msg_route(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_RECOVER */
+void do_recover(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_ELECTION_VOTE */
+void do_election_vote(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_ELECTION_COUNT */
+void do_election_count_vote(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input,
+ fsa_data_t *msg_data);
+
+/* A_ELECTION_CHECK */
+void do_election_check(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_DC_TIMER_STOP */
+void do_timer_control(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_DC_TAKEOVER */
+void do_dc_takeover(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_DC_RELEASE */
+void do_dc_release(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_DC_JOIN_OFFER_ALL */
+void do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_DC_JOIN_OFFER_ONE */
+void do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_DC_JOIN_ACK */
+void do_dc_join_ack(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_DC_JOIN_REQ */
+void do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input,
+ fsa_data_t *msg_data);
+
+/* A_DC_JOIN_FINALIZE */
+void do_dc_join_finalize(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_CL_JOIN_QUERY */
+/* is there a DC out there? */
+void do_cl_join_query(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t *msg_data);
+
+/* A_CL_JOIN_ANNOUNCE */
+void do_cl_join_announce(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t *msg_data);
+
+/* A_CL_JOIN_REQUEST */
+void do_cl_join_offer_respond(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input,
+ fsa_data_t *msg_data);
+
+/* A_CL_JOIN_RESULT */
+void do_cl_join_finalize_respond(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input,
+ fsa_data_t *msg_data);
+
+/* A_LRM_INVOKE */
+void do_lrm_invoke(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_LRM_EVENT */
+void do_lrm_event(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_TE_INVOKE, A_TE_CANCEL */
+void do_te_invoke(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_SHUTDOWN_REQ */
+void do_shutdown_req(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_SHUTDOWN */
+void do_shutdown(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_STOP */
+void do_stop(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_EXIT_0, A_EXIT_1 */
+void do_exit(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
+
+/* A_DC_JOIN_FINAL */
+void do_dc_join_final(long long action, enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t *msg_data);
+#endif
diff --git a/daemons/controld/controld_globals.h b/daemons/controld/controld_globals.h
new file mode 100644
index 0000000..eff1607
--- /dev/null
+++ b/daemons/controld/controld_globals.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright 2022-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef CONTROLD_GLOBALS__H
+# define CONTROLD_GLOBALS__H
+
+#include <crm_internal.h> // pcmk__output_t, etc.
+
+#include <stdint.h> // uint32_t, uint64_t
+#include <glib.h> // GList, GMainLoop
+#include <crm/cib.h> // cib_t
+#include <pacemaker-internal.h> // pcmk__graph_t
+#include <controld_fsa.h> // enum crmd_fsa_state
+
+typedef struct {
+ // Booleans
+
+ //! Group of \p controld_flags values
+ uint32_t flags;
+
+
+ // Controller FSA
+
+ //! FSA state
+ enum crmd_fsa_state fsa_state;
+
+ //! FSA actions (group of \p A_* flags)
+ uint64_t fsa_actions;
+
+ //! FSA input register contents (group of \p R_* flags)
+ uint64_t fsa_input_register;
+
+ //! FSA message queue
+ GList *fsa_message_queue;
+
+
+ // CIB
+
+ //! Connection to the CIB
+ cib_t *cib_conn;
+
+ //! CIB connection's client ID
+ const char *cib_client_id;
+
+
+ // Scheduler
+
+ //! Reference of the scheduler request being waited on
+ char *fsa_pe_ref;
+
+
+ // Transitioner
+
+ //! Transitioner UUID
+ char *te_uuid;
+
+ //! Graph of transition currently being processed
+ pcmk__graph_t *transition_graph;
+
+
+ // Logging
+
+ //! Output object for controller log messages
+ pcmk__output_t *logger_out;
+
+
+ // Other
+
+ //! Cluster name
+ char *cluster_name;
+
+ //! Designated controller name
+ char *dc_name;
+
+ //! Designated controller's Pacemaker version
+ char *dc_version;
+
+ //! Local node's node name
+ char *our_nodename;
+
+ //! Local node's UUID
+ char *our_uuid;
+
+ //! Last saved cluster communication layer membership ID
+ unsigned long long membership_id;
+
+ //! Max lifetime (in seconds) of a resource's shutdown lock to a node
+ guint shutdown_lock_limit;
+
+ //! Main event loop
+ GMainLoop *mainloop;
+} controld_globals_t;
+
+extern controld_globals_t controld_globals;
+
+/*!
+ * \internal
+ * \enum controld_flags
+ * \brief Bit flags to store various controller state and configuration info
+ */
+enum controld_flags {
+ //! The DC left in a membership change that is being processed
+ controld_dc_left = (1 << 0),
+
+ //! The FSA is stalled waiting for further input
+ controld_fsa_is_stalled = (1 << 1),
+
+ //! The local node has been in a quorate partition at some point
+ controld_ever_had_quorum = (1 << 2),
+
+ //! The local node is currently in a quorate partition
+ controld_has_quorum = (1 << 3),
+
+ //! Panic the local node if it loses quorum
+ controld_no_quorum_suicide = (1 << 4),
+
+ //! Lock resources to the local node when it shuts down cleanly
+ controld_shutdown_lock_enabled = (1 << 5),
+};
+
+# define controld_set_global_flags(flags_to_set) do { \
+ controld_globals.flags = pcmk__set_flags_as(__func__, __LINE__, \
+ LOG_TRACE, \
+ "Global", "controller", \
+ controld_globals.flags, \
+ (flags_to_set), \
+ #flags_to_set); \
+ } while (0)
+
+# define controld_clear_global_flags(flags_to_clear) do { \
+ controld_globals.flags \
+ = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Global", \
+ "controller", controld_globals.flags, \
+ (flags_to_clear), #flags_to_clear); \
+ } while (0)
+
+#endif // ifndef CONTROLD_GLOBALS__H
diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c
new file mode 100644
index 0000000..da6a9d6
--- /dev/null
+++ b/daemons/controld/controld_join_client.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <crm/crm.h>
+#include <crm/cib.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+
+#include <pacemaker-controld.h>
+
+void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
+
+extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig);
+
+/*!
+ * \internal
+ * \brief Remember if DC is shutting down as we join
+ *
+ * If we're joining while the current DC is shutting down, update its expected
+ * state, so we don't fence it if we become the new DC. (We weren't a peer
+ * when it broadcast its shutdown request.)
+ *
+ * \param[in] msg A join message from the DC
+ */
+static void
+update_dc_expected(const xmlNode *msg)
+{
+ if ((controld_globals.dc_name != NULL)
+ && pcmk__xe_attr_is_true(msg, F_CRM_DC_LEAVING)) {
+ crm_node_t *dc_node = crm_get_peer(0, controld_globals.dc_name);
+
+ pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_DOWN);
+ }
+}
+
+/* A_CL_JOIN_QUERY */
+/* is there a DC out there? */
+void
+do_cl_join_query(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL,
+ CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
+
+ sleep(1); // Give the cluster layer time to propagate to the DC
+ update_dc(NULL); /* Unset any existing value so that the result is not discarded */
+ crm_debug("Querying for a DC");
+ send_cluster_message(NULL, crm_msg_crmd, req, FALSE);
+ free_xml(req);
+}
+
+/* A_CL_JOIN_ANNOUNCE */
+
+/* this is kind of a workaround for the fact that we may not be around or
+ * are otherwise unable to reply when the DC sends out A_DC_JOIN_OFFER_ALL
+ */
+void
+do_cl_join_announce(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ /* don't announce if we're in one of these states */
+ if (cur_state != S_PENDING) {
+ crm_warn("Not announcing cluster join because in state %s",
+ fsa_state2string(cur_state));
+ return;
+ }
+
+ if (!pcmk_is_set(controld_globals.fsa_input_register, R_STARTING)) {
+ /* send as a broadcast */
+ xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL,
+ CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
+
+ crm_debug("Announcing availability");
+ update_dc(NULL);
+ send_cluster_message(NULL, crm_msg_crmd, req, FALSE);
+ free_xml(req);
+
+ } else {
+ /* Delay announce until we have finished local startup */
+ crm_warn("Delaying announce of cluster join until local startup is complete");
+ return;
+ }
+}
+
+static int query_call_id = 0;
+
+/* A_CL_JOIN_REQUEST */
+/* aka. accept the welcome offer */
+void
+do_cl_join_offer_respond(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ cib_t *cib_conn = controld_globals.cib_conn;
+
+ ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
+ const char *welcome_from;
+ const char *join_id;
+
+ CRM_CHECK(input != NULL, return);
+
+#if 0
+ if (we are sick) {
+ log error;
+
+ /* save the request for later? */
+ return;
+ }
+#endif
+
+ welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM);
+ join_id = crm_element_value(input->msg, F_CRM_JOIN_ID);
+ crm_trace("Accepting cluster join offer from node %s "CRM_XS" join-%s",
+ welcome_from, crm_element_value(input->msg, F_CRM_JOIN_ID));
+
+ /* we only ever want the last one */
+ if (query_call_id > 0) {
+ crm_trace("Cancelling previous join query: %d", query_call_id);
+ remove_cib_op_callback(query_call_id, FALSE);
+ query_call_id = 0;
+ }
+
+ if (update_dc(input->msg) == FALSE) {
+ crm_warn("Discarding cluster join offer from node %s (expected %s)",
+ welcome_from, controld_globals.dc_name);
+ return;
+ }
+
+ update_dc_expected(input->msg);
+
+ query_call_id = cib_conn->cmds->query(cib_conn, NULL, NULL,
+ cib_scope_local|cib_no_children);
+ fsa_register_cib_callback(query_call_id, strdup(join_id),
+ join_query_callback);
+ crm_trace("Registered join query callback: %d", query_call_id);
+
+ controld_set_fsa_action_flags(A_DC_TIMER_STOP);
+ controld_trigger_fsa();
+}
+
+void
+join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ char *join_id = user_data;
+ xmlNode *generation = create_xml_node(NULL, XML_CIB_TAG_GENERATION_TUPPLE);
+
+ CRM_LOG_ASSERT(join_id != NULL);
+
+ if (query_call_id != call_id) {
+ crm_trace("Query %d superseded", call_id);
+ goto done;
+ }
+
+ query_call_id = 0;
+ if(rc != pcmk_ok || output == NULL) {
+ crm_err("Could not retrieve version details for join-%s: %s (%d)",
+ join_id, pcmk_strerror(rc), rc);
+ register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
+
+ } else if (controld_globals.dc_name == NULL) {
+ crm_debug("Membership is in flux, not continuing join-%s", join_id);
+
+ } else {
+ xmlNode *reply = NULL;
+
+ crm_debug("Respond to join offer join-%s from %s",
+ join_id, controld_globals.dc_name);
+ copy_in_properties(generation, output);
+
+ reply = create_request(CRM_OP_JOIN_REQUEST, generation,
+ controld_globals.dc_name, CRM_SYSTEM_DC,
+ CRM_SYSTEM_CRMD, NULL);
+
+ crm_xml_add(reply, F_CRM_JOIN_ID, join_id);
+ crm_xml_add(reply, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
+ send_cluster_message(crm_get_peer(0, controld_globals.dc_name),
+ crm_msg_crmd, reply, TRUE);
+ free_xml(reply);
+ }
+
+ done:
+ free_xml(generation);
+}
+
+static void
+set_join_state(const char * start_state)
+{
+ if (pcmk__str_eq(start_state, "standby", pcmk__str_casei)) {
+ crm_notice("Forcing node %s to join in %s state per configured "
+ "environment", controld_globals.our_nodename, start_state);
+ cib__update_node_attr(controld_globals.logger_out,
+ controld_globals.cib_conn, cib_sync_call,
+ XML_CIB_TAG_NODES, controld_globals.our_uuid,
+ NULL, NULL, NULL, "standby", "on", NULL, NULL);
+
+ } else if (pcmk__str_eq(start_state, "online", pcmk__str_casei)) {
+ crm_notice("Forcing node %s to join in %s state per configured "
+ "environment", controld_globals.our_nodename, start_state);
+ cib__update_node_attr(controld_globals.logger_out,
+ controld_globals.cib_conn, cib_sync_call,
+ XML_CIB_TAG_NODES, controld_globals.our_uuid,
+ NULL, NULL, NULL, "standby", "off", NULL, NULL);
+
+ } else if (pcmk__str_eq(start_state, "default", pcmk__str_casei)) {
+ crm_debug("Not forcing a starting state on node %s",
+ controld_globals.our_nodename);
+
+ } else {
+ crm_warn("Unrecognized start state '%s', using 'default' (%s)",
+ start_state, controld_globals.our_nodename);
+ }
+}
+
+static int
+update_conn_host_cache(xmlNode *node, void *userdata)
+{
+ const char *remote = crm_element_value(node, XML_ATTR_ID);
+ const char *conn_host = crm_element_value(node, PCMK__XA_CONN_HOST);
+ const char *state = crm_element_value(node, XML_CIB_TAG_STATE);
+
+ crm_node_t *remote_peer = crm_remote_peer_get(remote);
+
+ if (remote_peer == NULL) {
+ return pcmk_rc_ok;
+ }
+
+ if (conn_host != NULL) {
+ pcmk__str_update(&remote_peer->conn_host, conn_host);
+ }
+
+ if (state != NULL) {
+ pcmk__update_peer_state(__func__, remote_peer, state, 0);
+ }
+
+ return pcmk_rc_ok;
+}
+
+/* A_CL_JOIN_RESULT */
+/* aka. this is notification that we have (or have not) been accepted */
+void
+do_cl_join_finalize_respond(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ xmlNode *tmp1 = NULL;
+ gboolean was_nack = TRUE;
+ static gboolean first_join = TRUE;
+ ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
+ const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE);
+
+ int join_id = -1;
+ const char *op = crm_element_value(input->msg, F_CRM_TASK);
+ const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM);
+
+ if (!pcmk__str_eq(op, CRM_OP_JOIN_ACKNAK, pcmk__str_casei)) {
+ crm_trace("Ignoring op=%s message", op);
+ return;
+ }
+
+ /* calculate if it was an ack or a nack */
+ if (pcmk__xe_attr_is_true(input->msg, CRM_OP_JOIN_ACKNAK)) {
+ was_nack = FALSE;
+ }
+
+ crm_element_value_int(input->msg, F_CRM_JOIN_ID, &join_id);
+
+ if (was_nack) {
+ crm_err("Shutting down because cluster join with leader %s failed "
+ CRM_XS" join-%d NACK'd", welcome_from, join_id);
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ controld_set_fsa_input_flags(R_STAYDOWN);
+ return;
+ }
+
+ if (!AM_I_DC
+ && pcmk__str_eq(welcome_from, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ crm_warn("Discarding our own welcome - we're no longer the DC");
+ return;
+ }
+
+ if (update_dc(input->msg) == FALSE) {
+ crm_warn("Discarding %s from node %s (expected from %s)",
+ op, welcome_from, controld_globals.dc_name);
+ return;
+ }
+
+ update_dc_expected(input->msg);
+
+ /* record the node's feature set as a transient attribute */
+ update_attrd(controld_globals.our_nodename, CRM_ATTR_FEATURE_SET,
+ CRM_FEATURE_SET, NULL, FALSE);
+
+ /* send our status section to the DC */
+ tmp1 = controld_query_executor_state();
+ if (tmp1 != NULL) {
+ xmlNode *remotes = NULL;
+ xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1,
+ controld_globals.dc_name, CRM_SYSTEM_DC,
+ CRM_SYSTEM_CRMD, NULL);
+
+ crm_xml_add_int(reply, F_CRM_JOIN_ID, join_id);
+
+ crm_debug("Confirming join-%d: sending local operation history to %s",
+ join_id, controld_globals.dc_name);
+
+ /*
+ * If this is the node's first join since the controller started on it,
+ * set its initial state (standby or member) according to the user's
+ * preference.
+ *
+ * We do not clear the LRM history here. Even if the DC failed to do it
+ * when we last left, removing them here creates a race condition if the
+ * controller is being recovered. Instead of a list of active resources
+ * from the executor, we may end up with a blank status section. If we
+ * are _NOT_ lucky, we will probe for the "wrong" instance of anonymous
+ * clones and end up with multiple active instances on the machine.
+ */
+ if (first_join
+ && !pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+
+ first_join = FALSE;
+ if (start_state) {
+ set_join_state(start_state);
+ }
+ }
+
+ send_cluster_message(crm_get_peer(0, controld_globals.dc_name),
+ crm_msg_crmd, reply, TRUE);
+ free_xml(reply);
+
+ if (AM_I_DC == FALSE) {
+ register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE,
+ __func__);
+ }
+
+ free_xml(tmp1);
+
+ /* Update the remote node cache with information about which node
+ * is hosting the connection.
+ */
+ remotes = pcmk__xe_match(input->msg, XML_CIB_TAG_NODES, NULL, NULL);
+ if (remotes != NULL) {
+ pcmk__xe_foreach_child(remotes, XML_CIB_TAG_NODE, update_conn_host_cache, NULL);
+ }
+
+ } else {
+ crm_err("Could not confirm join-%d with %s: Local operation history "
+ "failed", join_id, controld_globals.dc_name);
+ register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
+ }
+}
diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
new file mode 100644
index 0000000..f82b132
--- /dev/null
+++ b/daemons/controld/controld_join_dc.c
@@ -0,0 +1,987 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <crm/crm.h>
+
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/cluster.h>
+
+#include <pacemaker-controld.h>
+
+static char *max_generation_from = NULL;
+static xmlNodePtr max_generation_xml = NULL;
+
+/*!
+ * \internal
+ * \brief Nodes from which a CIB sync has failed since the peer joined
+ *
+ * This table is of the form (<tt>node_name -> join_id</tt>). \p node_name is
+ * the name of a client node from which a CIB \p sync_from() call has failed in
+ * \p do_dc_join_finalize() since the client joined the cluster as a peer.
+ * \p join_id is the ID of the join round in which the \p sync_from() failed,
+ * and is intended for use in nack log messages.
+ */
+static GHashTable *failed_sync_nodes = NULL;
+
+void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
+void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
+gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
+
+/* Numeric counter used to identify join rounds (an unsigned int would be
+ * appropriate, except we get and set it in XML as int)
+ */
+static int current_join_id = 0;
+
+/*!
+ * \internal
+ * \brief Destroy the hash table containing failed sync nodes
+ */
+void
+controld_destroy_failed_sync_table(void)
+{
+ if (failed_sync_nodes != NULL) {
+ g_hash_table_destroy(failed_sync_nodes);
+ failed_sync_nodes = NULL;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Remove a node from the failed sync nodes table if present
+ *
+ * \param[in] node_name Node name to remove
+ */
+void
+controld_remove_failed_sync_node(const char *node_name)
+{
+ if (failed_sync_nodes != NULL) {
+ g_hash_table_remove(failed_sync_nodes, (gchar *) node_name);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add to a hash table a node whose CIB failed to sync
+ *
+ * \param[in] node_name Name of node whose CIB failed to sync
+ * \param[in] join_id Join round when the failure occurred
+ */
+static void
+record_failed_sync_node(const char *node_name, gint join_id)
+{
+ if (failed_sync_nodes == NULL) {
+ failed_sync_nodes = pcmk__strikey_table(g_free, NULL);
+ }
+
+ /* If the node is already in the table then we failed to nack it during the
+ * filter offer step
+ */
+ CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name),
+ GINT_TO_POINTER(join_id)));
+}
+
+/*!
+ * \internal
+ * \brief Look up a node name in the failed sync table
+ *
+ * \param[in] node_name Name of node to look up
+ * \param[out] join_id Where to store the join ID of when the sync failed
+ *
+ * \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the
+ * node name was found, or \p pcmk_rc_node_unknown otherwise.
+ * \note \p *join_id is set to -1 if the node is not found.
+ */
+static int
+lookup_failed_sync_node(const char *node_name, gint *join_id)
+{
+ *join_id = -1;
+
+ if (failed_sync_nodes != NULL) {
+ gpointer result = g_hash_table_lookup(failed_sync_nodes,
+ (gchar *) node_name);
+ if (result != NULL) {
+ *join_id = GPOINTER_TO_INT(result);
+ return pcmk_rc_ok;
+ }
+ }
+ return pcmk_rc_node_unknown;
+}
+
+void
+crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase)
+{
+ enum crm_join_phase last = 0;
+
+ CRM_CHECK(node != NULL, return);
+
+ /* Remote nodes do not participate in joins */
+ if (pcmk_is_set(node->flags, crm_remote_node)) {
+ return;
+ }
+
+ last = node->join;
+
+ if(phase == last) {
+ crm_trace("Node %s join-%d phase is still %s "
+ CRM_XS " nodeid=%u source=%s",
+ node->uname, current_join_id, crm_join_phase_str(last),
+ node->id, source);
+
+ } else if ((phase <= crm_join_none) || (phase == (last + 1))) {
+ node->join = phase;
+ crm_trace("Node %s join-%d phase is now %s (was %s) "
+ CRM_XS " nodeid=%u source=%s",
+ node->uname, current_join_id, crm_join_phase_str(phase),
+ crm_join_phase_str(last), node->id, source);
+
+ } else {
+ crm_warn("Rejecting join-%d phase update for node %s because "
+ "can't go from %s to %s " CRM_XS " nodeid=%u source=%s",
+ current_join_id, node->uname, crm_join_phase_str(last),
+ crm_join_phase_str(phase), node->id, source);
+ }
+}
+
+static void
+start_join_round(void)
+{
+ GHashTableIter iter;
+ crm_node_t *peer = NULL;
+
+ crm_debug("Starting new join round join-%d", current_join_id);
+
+ g_hash_table_iter_init(&iter, crm_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
+ crm_update_peer_join(__func__, peer, crm_join_none);
+ }
+ if (max_generation_from != NULL) {
+ free(max_generation_from);
+ max_generation_from = NULL;
+ }
+ if (max_generation_xml != NULL) {
+ free_xml(max_generation_xml);
+ max_generation_xml = NULL;
+ }
+ controld_clear_fsa_input_flags(R_HAVE_CIB);
+ controld_forget_all_cib_replace_calls();
+}
+
+/*!
+ * \internal
+ * \brief Create a join message from the DC
+ *
+ * \param[in] join_op Join operation name
+ * \param[in] host_to Recipient of message
+ */
+static xmlNode *
+create_dc_message(const char *join_op, const char *host_to)
+{
+ xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD,
+ CRM_SYSTEM_DC, NULL);
+
+ /* Identify which election this is a part of */
+ crm_xml_add_int(msg, F_CRM_JOIN_ID, current_join_id);
+
+ /* Add a field specifying whether the DC is shutting down. This keeps the
+ * joining node from fencing the old DC if it becomes the new DC.
+ */
+ pcmk__xe_set_bool_attr(msg, F_CRM_DC_LEAVING,
+ pcmk_is_set(controld_globals.fsa_input_register,
+ R_SHUTDOWN));
+ return msg;
+}
+
+static void
+join_make_offer(gpointer key, gpointer value, gpointer user_data)
+{
+ xmlNode *offer = NULL;
+ crm_node_t *member = (crm_node_t *)value;
+
+ CRM_ASSERT(member != NULL);
+ if (crm_is_peer_active(member) == FALSE) {
+ crm_info("Not making join-%d offer to inactive node %s",
+ current_join_id,
+ (member->uname? member->uname : "with unknown name"));
+ if(member->expected == NULL && pcmk__str_eq(member->state, CRM_NODE_LOST, pcmk__str_casei)) {
+ /* You would think this unsafe, but in fact this plus an
+ * active resource is what causes it to be fenced.
+ *
+ * Yes, this does mean that any node that dies at the same
+ * time as the old DC and is not running resource (still)
+ * won't be fenced.
+ *
+ * I'm not happy about this either.
+ */
+ pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
+ }
+ return;
+ }
+
+ if (member->uname == NULL) {
+ crm_info("Not making join-%d offer to node uuid %s with unknown name",
+ current_join_id, member->uuid);
+ return;
+ }
+
+ if (controld_globals.membership_id != crm_peer_seq) {
+ controld_globals.membership_id = crm_peer_seq;
+ crm_info("Making join-%d offers based on membership event %llu",
+ current_join_id, crm_peer_seq);
+ }
+
+ if(user_data && member->join > crm_join_none) {
+ crm_info("Not making join-%d offer to already known node %s (%s)",
+ current_join_id, member->uname,
+ crm_join_phase_str(member->join));
+ return;
+ }
+
+ crm_update_peer_join(__func__, (crm_node_t*)member, crm_join_none);
+
+ offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname);
+
+ // Advertise our feature set so the joining node can bail if not compatible
+ crm_xml_add(offer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
+
+ crm_info("Sending join-%d offer to %s", current_join_id, member->uname);
+ send_cluster_message(member, crm_msg_crmd, offer, TRUE);
+ free_xml(offer);
+
+ crm_update_peer_join(__func__, member, crm_join_welcomed);
+}
+
+/* A_DC_JOIN_OFFER_ALL */
+void
+do_dc_join_offer_all(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ int count;
+
+ /* Reset everyone's status back to down or in_ccm in the CIB.
+ * Any nodes that are active in the CIB but not in the cluster membership
+ * will be seen as offline by the scheduler anyway.
+ */
+ current_join_id++;
+ start_join_round();
+
+ update_dc(NULL);
+ if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
+ crm_info("A new node joined the cluster");
+ }
+ g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
+
+ count = crmd_join_phase_count(crm_join_welcomed);
+ crm_info("Waiting on join-%d requests from %d outstanding node%s",
+ current_join_id, count, pcmk__plural_s(count));
+
+ // Don't waste time by invoking the scheduler yet
+}
+
+/* A_DC_JOIN_OFFER_ONE */
+void
+do_dc_join_offer_one(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ crm_node_t *member;
+ ha_msg_input_t *welcome = NULL;
+ int count;
+ const char *join_to = NULL;
+
+ if (msg_data->data == NULL) {
+ crm_info("Making join-%d offers to any unconfirmed nodes "
+ "because an unknown node joined", current_join_id);
+ g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
+ check_join_state(cur_state, __func__);
+ return;
+ }
+
+ welcome = fsa_typed_data(fsa_dt_ha_msg);
+ if (welcome == NULL) {
+ // fsa_typed_data() already logged an error
+ return;
+ }
+
+ join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM);
+ if (join_to == NULL) {
+ crm_err("Can't make join-%d offer to unknown node", current_join_id);
+ return;
+ }
+ member = crm_get_peer(0, join_to);
+
+ /* It is possible that a node will have been sick or starting up when the
+ * original offer was made. However, it will either re-announce itself in
+ * due course, or we can re-store the original offer on the client.
+ */
+
+ crm_update_peer_join(__func__, member, crm_join_none);
+ join_make_offer(NULL, member, NULL);
+
+ /* If the offer isn't to the local node, make an offer to the local node as
+ * well, to ensure the correct value for max_generation_from.
+ */
+ if (strcasecmp(join_to, controld_globals.our_nodename) != 0) {
+ member = crm_get_peer(0, controld_globals.our_nodename);
+ join_make_offer(NULL, member, NULL);
+ }
+
+ /* This was a genuine join request; cancel any existing transition and
+ * invoke the scheduler.
+ */
+ abort_transition(INFINITY, pcmk__graph_restart, "Node join", NULL);
+
+ count = crmd_join_phase_count(crm_join_welcomed);
+ crm_info("Waiting on join-%d requests from %d outstanding node%s",
+ current_join_id, count, pcmk__plural_s(count));
+
+ // Don't waste time by invoking the scheduler yet
+}
+
+static int
+compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
+{
+ const char *elem_l = crm_element_value(left, field);
+ const char *elem_r = crm_element_value(right, field);
+
+ long long int_elem_l;
+ long long int_elem_r;
+
+ pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
+ pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
+
+ if (int_elem_l < int_elem_r) {
+ return -1;
+
+ } else if (int_elem_l > int_elem_r) {
+ return 1;
+ }
+
+ return 0;
+}
+
+/* A_DC_JOIN_PROCESS_REQ */
+void
+do_dc_join_filter_offer(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ xmlNode *generation = NULL;
+
+ int cmp = 0;
+ int join_id = -1;
+ int count = 0;
+ gint value = 0;
+ gboolean ack_nack_bool = TRUE;
+ ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
+
+ const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
+ const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE);
+ const char *join_version = crm_element_value(join_ack->msg,
+ XML_ATTR_CRM_VERSION);
+ crm_node_t *join_node = NULL;
+
+ if (join_from == NULL) {
+ crm_err("Ignoring invalid join request without node name");
+ return;
+ }
+ join_node = crm_get_peer(0, join_from);
+
+ crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
+ if (join_id != current_join_id) {
+ crm_debug("Ignoring join-%d request from %s because we are on join-%d",
+ join_id, join_from, current_join_id);
+ check_join_state(cur_state, __func__);
+ return;
+ }
+
+ generation = join_ack->xml;
+ if (max_generation_xml != NULL && generation != NULL) {
+ int lpc = 0;
+
+ const char *attributes[] = {
+ XML_ATTR_GENERATION_ADMIN,
+ XML_ATTR_GENERATION,
+ XML_ATTR_NUMUPDATES,
+ };
+
+ for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) {
+ cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]);
+ }
+ }
+
+ if (ref == NULL) {
+ ref = "none"; // for logging only
+ }
+
+ if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) {
+ crm_err("Rejecting join-%d request from node %s because we failed to "
+ "sync its CIB in join-%d " CRM_XS " ref=%s",
+ join_id, join_from, value, ref);
+ ack_nack_bool = FALSE;
+
+ } else if (!crm_is_peer_active(join_node)) {
+ if (match_down_event(join_from) != NULL) {
+ /* The join request was received after the node was fenced or
+ * otherwise shutdown in a way that we're aware of. No need to log
+ * an error in this rare occurrence; we know the client was recently
+ * shut down, and receiving a lingering in-flight request is not
+ * cause for alarm.
+ */
+ crm_debug("Rejecting join-%d request from inactive node %s "
+ CRM_XS " ref=%s", join_id, join_from, ref);
+ } else {
+ crm_err("Rejecting join-%d request from inactive node %s "
+ CRM_XS " ref=%s", join_id, join_from, ref);
+ }
+ ack_nack_bool = FALSE;
+
+ } else if (generation == NULL) {
+ crm_err("Rejecting invalid join-%d request from node %s "
+ "missing CIB generation " CRM_XS " ref=%s",
+ join_id, join_from, ref);
+ ack_nack_bool = FALSE;
+
+ } else if ((join_version == NULL)
+ || !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
+ crm_err("Rejecting join-%d request from node %s because feature set %s"
+ " is incompatible with ours (%s) " CRM_XS " ref=%s",
+ join_id, join_from, (join_version? join_version : "pre-3.1.0"),
+ CRM_FEATURE_SET, ref);
+ ack_nack_bool = FALSE;
+
+ } else if (max_generation_xml == NULL) {
+ const char *validation = crm_element_value(generation,
+ XML_ATTR_VALIDATION);
+
+ if (get_schema_version(validation) < 0) {
+ crm_err("Rejecting join-%d request from %s (with first CIB "
+ "generation) due to unknown schema version %s "
+ CRM_XS " ref=%s",
+ join_id, join_from, validation, ref);
+ ack_nack_bool = FALSE;
+
+ } else {
+ crm_debug("Accepting join-%d request from %s (with first CIB "
+ "generation) " CRM_XS " ref=%s",
+ join_id, join_from, ref);
+ max_generation_xml = copy_xml(generation);
+ pcmk__str_update(&max_generation_from, join_from);
+ }
+
+ } else if ((cmp < 0)
+ || ((cmp == 0)
+ && pcmk__str_eq(join_from, controld_globals.our_nodename,
+ pcmk__str_casei))) {
+ const char *validation = crm_element_value(generation,
+ XML_ATTR_VALIDATION);
+
+ if (get_schema_version(validation) < 0) {
+ crm_err("Rejecting join-%d request from %s (with better CIB "
+ "generation than current best from %s) due to unknown "
+ "schema version %s " CRM_XS " ref=%s",
+ join_id, join_from, max_generation_from, validation, ref);
+ ack_nack_bool = FALSE;
+
+ } else {
+ crm_debug("Accepting join-%d request from %s (with better CIB "
+ "generation than current best from %s) " CRM_XS " ref=%s",
+ join_id, join_from, max_generation_from, ref);
+ crm_log_xml_debug(max_generation_xml, "Old max generation");
+ crm_log_xml_debug(generation, "New max generation");
+
+ free_xml(max_generation_xml);
+ max_generation_xml = copy_xml(join_ack->xml);
+ pcmk__str_update(&max_generation_from, join_from);
+ }
+
+ } else {
+ crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s",
+ join_id, join_from, ref);
+ }
+
+ if (!ack_nack_bool) {
+ if (compare_version(join_version, "3.17.0") < 0) {
+ /* Clients with CRM_FEATURE_SET < 3.17.0 may respawn infinitely
+ * after a nack message, don't send one
+ */
+ crm_update_peer_join(__func__, join_node, crm_join_nack_quiet);
+ } else {
+ crm_update_peer_join(__func__, join_node, crm_join_nack);
+ }
+ pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);
+
+ } else {
+ crm_update_peer_join(__func__, join_node, crm_join_integrated);
+ pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
+ }
+
+ count = crmd_join_phase_count(crm_join_integrated);
+ crm_debug("%d node%s currently integrated in join-%d",
+ count, pcmk__plural_s(count), join_id);
+
+ if (check_join_state(cur_state, __func__) == FALSE) {
+ // Don't waste time by invoking the scheduler yet
+ count = crmd_join_phase_count(crm_join_welcomed);
+ crm_debug("Waiting on join-%d requests from %d outstanding node%s",
+ join_id, count, pcmk__plural_s(count));
+ }
+}
+
+/* A_DC_JOIN_FINALIZE */
+void
+do_dc_join_finalize(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ char *sync_from = NULL;
+ int rc = pcmk_ok;
+ int count_welcomed = crmd_join_phase_count(crm_join_welcomed);
+ int count_finalizable = crmd_join_phase_count(crm_join_integrated)
+ + crmd_join_phase_count(crm_join_nack)
+ + crmd_join_phase_count(crm_join_nack_quiet);
+
+ /* This we can do straight away and avoid clients timing us out
+ * while we compute the latest CIB
+ */
+ if (count_welcomed != 0) {
+ crm_debug("Waiting on join-%d requests from %d outstanding node%s "
+ "before finalizing join", current_join_id, count_welcomed,
+ pcmk__plural_s(count_welcomed));
+ crmd_join_phase_log(LOG_DEBUG);
+ /* crmd_fsa_stall(FALSE); Needed? */
+ return;
+
+ } else if (count_finalizable == 0) {
+ crm_debug("Finalization not needed for join-%d at the current time",
+ current_join_id);
+ crmd_join_phase_log(LOG_DEBUG);
+ check_join_state(controld_globals.fsa_state, __func__);
+ return;
+ }
+
+ controld_clear_fsa_input_flags(R_HAVE_CIB);
+ if (pcmk__str_eq(max_generation_from, controld_globals.our_nodename,
+ pcmk__str_null_matches|pcmk__str_casei)) {
+ controld_set_fsa_input_flags(R_HAVE_CIB);
+ }
+
+ if (!controld_globals.transition_graph->complete) {
+ crm_warn("Delaying join-%d finalization while transition in progress",
+ current_join_id);
+ crmd_join_phase_log(LOG_DEBUG);
+ crmd_fsa_stall(FALSE);
+ return;
+ }
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
+ // Send our CIB out to everyone
+ pcmk__str_update(&sync_from, controld_globals.our_nodename);
+ crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
+ current_join_id, count_finalizable,
+ pcmk__plural_s(count_finalizable));
+ crm_log_xml_debug(max_generation_xml, "Requested CIB version");
+
+ } else {
+ // Ask for the agreed best CIB
+ pcmk__str_update(&sync_from, max_generation_from);
+ crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
+ current_join_id, count_finalizable,
+ pcmk__plural_s(count_finalizable), sync_from);
+ crm_log_xml_notice(max_generation_xml, "Requested CIB version");
+ }
+ crmd_join_phase_log(LOG_DEBUG);
+
+ rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn,
+ sync_from, NULL, cib_none);
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
+ controld_record_cib_replace_call(rc);
+ }
+ fsa_register_cib_callback(rc, sync_from, finalize_sync_callback);
+}
+
+void
+free_max_generation(void)
+{
+ free(max_generation_from);
+ max_generation_from = NULL;
+
+ free_xml(max_generation_xml);
+ max_generation_xml = NULL;
+}
+
+void
+finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ CRM_LOG_ASSERT(-EPERM != rc);
+
+ controld_forget_cib_replace_call(call_id);
+
+ if (rc != pcmk_ok) {
+ const char *sync_from = (const char *) user_data;
+
+ do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
+ "Could not sync CIB from %s in join-%d: %s",
+ sync_from, current_join_id, pcmk_strerror(rc));
+
+ if (rc != -pcmk_err_old_data) {
+ record_failed_sync_node(sync_from, current_join_id);
+ }
+
+ /* restart the whole join process */
+ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
+ __func__);
+
+ } else if (!AM_I_DC) {
+ crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
+
+ } else if (controld_globals.fsa_state != S_FINALIZE_JOIN) {
+ crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN "
+ "(%s)", current_join_id,
+ fsa_state2string(controld_globals.fsa_state));
+
+ } else {
+ controld_set_fsa_input_flags(R_HAVE_CIB);
+
+ /* make sure dc_uuid is re-set to us */
+ if (!check_join_state(controld_globals.fsa_state, __func__)) {
+ int count_finalizable = 0;
+
+ count_finalizable = crmd_join_phase_count(crm_join_integrated)
+ + crmd_join_phase_count(crm_join_nack)
+ + crmd_join_phase_count(crm_join_nack_quiet);
+
+ crm_debug("Notifying %d node%s of join-%d results",
+ count_finalizable, pcmk__plural_s(count_finalizable),
+ current_join_id);
+ g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL);
+ }
+ }
+}
+
+static void
+join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ fsa_data_t *msg_data = NULL;
+
+ if (rc == pcmk_ok) {
+ crm_debug("join-%d node history update (via CIB call %d) complete",
+ current_join_id, call_id);
+ check_join_state(controld_globals.fsa_state, __func__);
+
+ } else {
+ crm_err("join-%d node history update (via CIB call %d) failed: %s "
+ "(next transition may determine resource status incorrectly)",
+ current_join_id, call_id, pcmk_strerror(rc));
+ crm_log_xml_debug(msg, "failed");
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ }
+}
+
+/* A_DC_JOIN_PROCESS_ACK */
+void
+do_dc_join_ack(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ int join_id = -1;
+ ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
+ enum controld_section_e section = controld_section_lrm;
+ const int cib_opts = cib_scope_local|cib_can_create;
+
+ const char *op = crm_element_value(join_ack->msg, F_CRM_TASK);
+ const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
+ crm_node_t *peer = NULL;
+
+ // Sanity checks
+ if (join_from == NULL) {
+ crm_warn("Ignoring message received without node identification");
+ return;
+ }
+ if (op == NULL) {
+ crm_warn("Ignoring message received from %s without task", join_from);
+ return;
+ }
+
+ if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
+ crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
+ op, join_from, CRM_OP_JOIN_CONFIRM);
+ return;
+ }
+
+ if (crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id) != 0) {
+ crm_warn("Ignoring join confirmation from %s without valid join ID",
+ join_from);
+ return;
+ }
+
+ peer = crm_get_peer(0, join_from);
+ if (peer->join != crm_join_finalized) {
+ crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
+ "(currently %s not %s)",
+ join_id, join_from, crm_join_phase_str(peer->join),
+ crm_join_phase_str(crm_join_finalized));
+ return;
+ }
+
+ if (join_id != current_join_id) {
+ crm_err("Rejecting join-%d confirmation from %s "
+ "because currently on join-%d",
+ join_id, join_from, current_join_id);
+ crm_update_peer_join(__func__, peer, crm_join_nack);
+ return;
+ }
+
+ crm_update_peer_join(__func__, peer, crm_join_confirmed);
+
+ /* Update CIB with node's current executor state. A new transition will be
+ * triggered later, when the CIB notifies us of the change.
+ */
+ if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
+ section = controld_section_lrm_unlocked;
+ }
+ controld_delete_node_state(join_from, section, cib_scope_local);
+ if (pcmk__str_eq(join_from, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ xmlNode *now_dc_lrmd_state = controld_query_executor_state();
+
+ if (now_dc_lrmd_state != NULL) {
+ crm_debug("Updating local node history for join-%d "
+ "from query result", join_id);
+ controld_update_cib(XML_CIB_TAG_STATUS, now_dc_lrmd_state, cib_opts,
+ join_update_complete_callback);
+ free_xml(now_dc_lrmd_state);
+ } else {
+ crm_warn("Updating local node history from join-%d confirmation "
+ "because query failed", join_id);
+ controld_update_cib(XML_CIB_TAG_STATUS, join_ack->xml, cib_opts,
+ join_update_complete_callback);
+ }
+ } else {
+ crm_debug("Updating node history for %s from join-%d confirmation",
+ join_from, join_id);
+ controld_update_cib(XML_CIB_TAG_STATUS, join_ack->xml, cib_opts,
+ join_update_complete_callback);
+ }
+}
+
+void
+finalize_join_for(gpointer key, gpointer value, gpointer user_data)
+{
+ xmlNode *acknak = NULL;
+ xmlNode *tmp1 = NULL;
+ crm_node_t *join_node = value;
+ const char *join_to = join_node->uname;
+ bool integrated = false;
+
+ switch (join_node->join) {
+ case crm_join_integrated:
+ integrated = true;
+ break;
+ case crm_join_nack:
+ case crm_join_nack_quiet:
+ break;
+ default:
+ crm_trace("Not updating non-integrated and non-nacked node %s (%s) "
+ "for join-%d", join_to,
+ crm_join_phase_str(join_node->join), current_join_id);
+ return;
+ }
+
+ /* Update the <node> element with the node's name and UUID, in case they
+ * weren't known before
+ */
+ crm_trace("Updating node name and UUID in CIB for %s", join_to);
+ tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE);
+ set_uuid(tmp1, XML_ATTR_ID, join_node);
+ crm_xml_add(tmp1, XML_ATTR_UNAME, join_to);
+ fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1);
+ free_xml(tmp1);
+
+ if (join_node->join == crm_join_nack_quiet) {
+ crm_trace("Not sending nack message to node %s with feature set older "
+ "than 3.17.0", join_to);
+ return;
+ }
+
+ join_node = crm_get_peer(0, join_to);
+ if (!crm_is_peer_active(join_node)) {
+ /*
+ * NACK'ing nodes that the membership layer doesn't know about yet
+ * simply creates more churn
+ *
+ * Better to leave them waiting and let the join restart when
+ * the new membership event comes in
+ *
+ * All other NACKs (due to versions etc) should still be processed
+ */
+ pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
+ return;
+ }
+
+ // Acknowledge or nack node's join request
+ crm_debug("%sing join-%d request from %s",
+ integrated? "Acknowledg" : "Nack", current_join_id, join_to);
+ acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
+ pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated);
+
+ if (integrated) {
+ // No change needed for a nacked node
+ crm_update_peer_join(__func__, join_node, crm_join_finalized);
+ pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
+
+ /* Iterate through the remote peer cache and add information on which
+ * node hosts each to the ACK message. This keeps new controllers in
+ * sync with what has already happened.
+ */
+ if (crm_remote_peer_cache_size() != 0) {
+ GHashTableIter iter;
+ crm_node_t *node = NULL;
+ xmlNode *remotes = create_xml_node(acknak, XML_CIB_TAG_NODES);
+
+ g_hash_table_iter_init(&iter, crm_remote_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
+ xmlNode *remote = NULL;
+
+ if (!node->conn_host) {
+ continue;
+ }
+
+ remote = create_xml_node(remotes, XML_CIB_TAG_NODE);
+ pcmk__xe_set_props(remote,
+ XML_ATTR_ID, node->uname,
+ XML_CIB_TAG_STATE, node->state,
+ PCMK__XA_CONN_HOST, node->conn_host,
+ NULL);
+ }
+ }
+ }
+ send_cluster_message(join_node, crm_msg_crmd, acknak, TRUE);
+ free_xml(acknak);
+ return;
+}
+
+gboolean
+check_join_state(enum crmd_fsa_state cur_state, const char *source)
+{
+ static unsigned long long highest_seq = 0;
+
+ if (controld_globals.membership_id != crm_peer_seq) {
+ crm_debug("join-%d: Membership changed from %llu to %llu "
+ CRM_XS " highest=%llu state=%s for=%s",
+ current_join_id, controld_globals.membership_id, crm_peer_seq,
+ highest_seq, fsa_state2string(cur_state), source);
+ if(highest_seq < crm_peer_seq) {
+ /* Don't spam the FSA with duplicates */
+ highest_seq = crm_peer_seq;
+ register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
+ }
+
+ } else if (cur_state == S_INTEGRATION) {
+ if (crmd_join_phase_count(crm_join_welcomed) == 0) {
+ int count = crmd_join_phase_count(crm_join_integrated);
+
+ crm_debug("join-%d: Integration of %d peer%s complete "
+ CRM_XS " state=%s for=%s",
+ current_join_id, count, pcmk__plural_s(count),
+ fsa_state2string(cur_state), source);
+ register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
+ return TRUE;
+ }
+
+ } else if (cur_state == S_FINALIZE_JOIN) {
+ if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
+ crm_debug("join-%d: Delaying finalization until we have CIB "
+ CRM_XS " state=%s for=%s",
+ current_join_id, fsa_state2string(cur_state), source);
+ return TRUE;
+
+ } else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
+ int count = crmd_join_phase_count(crm_join_welcomed);
+
+ crm_debug("join-%d: Still waiting on %d welcomed node%s "
+ CRM_XS " state=%s for=%s",
+ current_join_id, count, pcmk__plural_s(count),
+ fsa_state2string(cur_state), source);
+ crmd_join_phase_log(LOG_DEBUG);
+
+ } else if (crmd_join_phase_count(crm_join_integrated) != 0) {
+ int count = crmd_join_phase_count(crm_join_integrated);
+
+ crm_debug("join-%d: Still waiting on %d integrated node%s "
+ CRM_XS " state=%s for=%s",
+ current_join_id, count, pcmk__plural_s(count),
+ fsa_state2string(cur_state), source);
+ crmd_join_phase_log(LOG_DEBUG);
+
+ } else if (crmd_join_phase_count(crm_join_finalized) != 0) {
+ int count = crmd_join_phase_count(crm_join_finalized);
+
+ crm_debug("join-%d: Still waiting on %d finalized node%s "
+ CRM_XS " state=%s for=%s",
+ current_join_id, count, pcmk__plural_s(count),
+ fsa_state2string(cur_state), source);
+ crmd_join_phase_log(LOG_DEBUG);
+
+ } else {
+ crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s",
+ current_join_id, fsa_state2string(cur_state), source);
+ register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+void
+do_dc_join_final(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
+ crm_update_quorum(crm_have_quorum, TRUE);
+}
+
+int crmd_join_phase_count(enum crm_join_phase phase)
+{
+ int count = 0;
+ crm_node_t *peer;
+ GHashTableIter iter;
+
+ g_hash_table_iter_init(&iter, crm_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
+ if(peer->join == phase) {
+ count++;
+ }
+ }
+ return count;
+}
+
+void crmd_join_phase_log(int level)
+{
+ crm_node_t *peer;
+ GHashTableIter iter;
+
+ g_hash_table_iter_init(&iter, crm_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
+ do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname,
+ crm_join_phase_str(peer->join));
+ }
+}
diff --git a/daemons/controld/controld_lrm.h b/daemons/controld/controld_lrm.h
new file mode 100644
index 0000000..25f3db3
--- /dev/null
+++ b/daemons/controld/controld_lrm.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+#ifndef CONTROLD_LRM__H
+# define CONTROLD_LRM__H
+
+#include <controld_messages.h>
+
+extern gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level);
+void lrm_clear_last_failure(const char *rsc_id, const char *node_name,
+ const char *operation, guint interval_ms);
+void lrm_op_callback(lrmd_event_data_t * op);
+lrmd_t *crmd_local_lrmd_conn(void);
+
+typedef struct resource_history_s {
+ char *id;
+ uint32_t last_callid;
+ lrmd_rsc_info_t rsc;
+ lrmd_event_data_t *last;
+ lrmd_event_data_t *failed;
+ GList *recurring_op_list;
+
+ /* Resources must be stopped using the same
+ * parameters they were started with. This hashtable
+ * holds the parameters that should be used for the next stop
+ * cmd on this resource. */
+ GHashTable *stop_params;
+} rsc_history_t;
+
+void history_free(gpointer data);
+
+enum active_op_e {
+ active_op_remove = (1 << 0),
+ active_op_cancelled = (1 << 1),
+};
+
+// In-flight action (recurring or pending)
+typedef struct active_op_s {
+ guint interval_ms;
+ int call_id;
+ uint32_t flags; // bitmask of active_op_e
+ time_t start_time;
+ time_t lock_time;
+ char *rsc_id;
+ char *op_type;
+ char *op_key;
+ char *user_data;
+ GHashTable *params;
+} active_op_t;
+
+#define controld_set_active_op_flags(active_op, flags_to_set) do { \
+ (active_op)->flags = pcmk__set_flags_as(__func__, __LINE__, \
+ LOG_TRACE, "Active operation", (active_op)->op_key, \
+ (active_op)->flags, (flags_to_set), #flags_to_set); \
+ } while (0)
+
+#define controld_clear_active_op_flags(active_op, flags_to_clear) do { \
+ (active_op)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
+ LOG_TRACE, "Active operation", (active_op)->op_key, \
+ (active_op)->flags, (flags_to_clear), #flags_to_clear); \
+ } while (0)
+
+typedef struct lrm_state_s {
+ const char *node_name;
+ void *conn; // Reserved for controld_execd_state.c usage
+ void *remote_ra_data; // Reserved for controld_remote_ra.c usage
+
+ GHashTable *resource_history;
+ GHashTable *active_ops; // Pending and recurring actions
+ GHashTable *deletion_ops;
+ GHashTable *rsc_info_cache;
+ GHashTable *metadata_cache; // key = class[:provider]:agent, value = ra_metadata_s
+
+ int num_lrm_register_fails;
+} lrm_state_t;
+
+struct pending_deletion_op_s {
+ char *rsc;
+ ha_msg_input_t *input;
+};
+
+/*!
+ * \brief Check whether this the local IPC connection to the executor
+ */
+gboolean
+lrm_state_is_local(lrm_state_t *lrm_state);
+
+/*!
+ * \brief Clear all state information from a single state entry.
+ * \note It sometimes useful to save metadata cache when it won't go stale.
+ * \note This does not close the executor connection
+ */
+void lrm_state_reset_tables(lrm_state_t * lrm_state, gboolean reset_metadata);
+GList *lrm_state_get_list(void);
+
+/*!
+ * \brief Initiate internal state tables
+ */
+gboolean lrm_state_init_local(void);
+
+/*!
+ * \brief Destroy all state entries and internal state tables
+ */
+void lrm_state_destroy_all(void);
+
+/*!
+ * \brief Destroy executor connection by node name
+ */
+void lrm_state_destroy(const char *node_name);
+
+/*!
+ * \brief Find lrm_state data by node name
+ */
+lrm_state_t *lrm_state_find(const char *node_name);
+
+/*!
+ * \brief Either find or create a new entry
+ */
+lrm_state_t *lrm_state_find_or_create(const char *node_name);
+
+/*!
+ * The functions below are wrappers for the executor API the the controller
+ * uses. These wrapper functions allow us to treat the controller's remote
+ * executor connection resources the same as regular resources. Internally,
+ * regular resources go to the executor, and remote connection resources are
+ * handled locally in the controller.
+ */
+void lrm_state_disconnect_only(lrm_state_t * lrm_state);
+void lrm_state_disconnect(lrm_state_t * lrm_state);
+int controld_connect_local_executor(lrm_state_t *lrm_state);
+int controld_connect_remote_executor(lrm_state_t *lrm_state, const char *server,
+ int port, int timeout);
+int lrm_state_is_connected(lrm_state_t * lrm_state);
+int lrm_state_poke_connection(lrm_state_t * lrm_state);
+
+int lrm_state_get_metadata(lrm_state_t * lrm_state,
+ const char *class,
+ const char *provider,
+ const char *agent, char **output, enum lrmd_call_options options);
+int lrm_state_cancel(lrm_state_t *lrm_state, const char *rsc_id,
+ const char *action, guint interval_ms);
+int controld_execute_resource_agent(lrm_state_t *lrm_state, const char *rsc_id,
+ const char *action, const char *userdata,
+ guint interval_ms, int timeout_ms,
+ int start_delay_ms,
+ GHashTable *parameters, int *call_id);
+lrmd_rsc_info_t *lrm_state_get_rsc_info(lrm_state_t * lrm_state,
+ const char *rsc_id, enum lrmd_call_options options);
+int lrm_state_register_rsc(lrm_state_t * lrm_state,
+ const char *rsc_id,
+ const char *class,
+ const char *provider, const char *agent, enum lrmd_call_options options);
+int lrm_state_unregister_rsc(lrm_state_t * lrm_state,
+ const char *rsc_id, enum lrmd_call_options options);
+
+// Functions used to manage remote executor connection resources
+void remote_lrm_op_callback(lrmd_event_data_t * op);
+gboolean is_remote_lrmd_ra(const char *agent, const char *provider, const char *id);
+lrmd_rsc_info_t *remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id);
+int remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
+ const char *action, guint interval_ms);
+int controld_execute_remote_agent(const lrm_state_t *lrm_state,
+ const char *rsc_id, const char *action,
+ const char *userdata,
+ guint interval_ms, int timeout_ms,
+ int start_delay_ms, lrmd_key_value_t *params,
+ int *call_id);
+void remote_ra_cleanup(lrm_state_t * lrm_state);
+void remote_ra_fail(const char *node_name);
+void remote_ra_process_pseudo(xmlNode *xml);
+gboolean remote_ra_is_in_maintenance(lrm_state_t * lrm_state);
+void remote_ra_process_maintenance_nodes(xmlNode *xml);
+gboolean remote_ra_controlling_guest(lrm_state_t * lrm_state);
+
+void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
+ active_op_t *pending, const xmlNode *action_xml);
+void controld_ack_event_directly(const char *to_host, const char *to_sys,
+ const lrmd_rsc_info_t *rsc,
+ lrmd_event_data_t *op, const char *rsc_id);
+void controld_rc2event(lrmd_event_data_t *event, int rc);
+void controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id);
+
+#endif
diff --git a/daemons/controld/controld_matrix.c b/daemons/controld/controld_matrix.c
new file mode 100644
index 0000000..a404f0a
--- /dev/null
+++ b/daemons/controld/controld_matrix.c
@@ -0,0 +1,1250 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdint.h> // uint64_t
+
+#include <pacemaker-controld.h>
+
+/*
+ * The state transition table. The rows are inputs, and
+ * the columns are states.
+ */
+static const enum crmd_fsa_state fsa_next_states[MAXINPUT][MAXSTATE] = {
+/* Got an I_NULL */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_CIB_OP */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_CIB_UPDATE */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_RECOVERY,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_RECOVERY,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_DC_TIMEOUT */
+ {
+ /* S_IDLE ==> */ S_RECOVERY,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_RECOVERY,
+ /* S_FINALIZE_JOIN ==> */ S_RECOVERY,
+ /* S_NOT_DC ==> */ S_ELECTION,
+ /* S_POLICY_ENGINE ==> */ S_RECOVERY,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RECOVERY,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_ELECTION,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_RECOVERY,
+ /* S_HALT ==> */ S_ELECTION,
+ },
+
+/* Got an I_ELECTION */
+ {
+ /* S_IDLE ==> */ S_ELECTION,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_ELECTION,
+ /* S_FINALIZE_JOIN ==> */ S_ELECTION,
+ /* S_NOT_DC ==> */ S_ELECTION,
+ /* S_POLICY_ENGINE ==> */ S_ELECTION,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_ELECTION,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_ELECTION,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_PE_CALC */
+ {
+ /* S_IDLE ==> */ S_POLICY_ENGINE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_RELEASE_DC */
+ {
+ /* S_IDLE ==> */ S_RELEASE_DC,
+ /* S_ELECTION ==> */ S_RELEASE_DC,
+ /* S_INTEGRATION ==> */ S_RELEASE_DC,
+ /* S_FINALIZE_JOIN ==> */ S_RELEASE_DC,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_RELEASE_DC,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_RELEASE_DC,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_ELECTION_DC */
+ {
+ /* S_IDLE ==> */ S_INTEGRATION,
+ /* S_ELECTION ==> */ S_INTEGRATION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_INTEGRATION,
+ /* S_NOT_DC ==> */ S_INTEGRATION,
+ /* S_POLICY_ENGINE ==> */ S_INTEGRATION,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_ERROR */
+ {
+ /* S_IDLE ==> */ S_RECOVERY,
+ /* S_ELECTION ==> */ S_RECOVERY,
+ /* S_INTEGRATION ==> */ S_RECOVERY,
+ /* S_FINALIZE_JOIN ==> */ S_RECOVERY,
+ /* S_NOT_DC ==> */ S_RECOVERY,
+ /* S_POLICY_ENGINE ==> */ S_RECOVERY,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RECOVERY,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_RECOVERY,
+ /* S_STOPPING ==> */ S_TERMINATE,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_RECOVERY,
+ /* S_HALT ==> */ S_RECOVERY,
+ },
+
+/* Got an I_FAIL */
+ {
+ /* S_IDLE ==> */ S_RECOVERY,
+ /* S_ELECTION ==> */ S_RELEASE_DC,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_INTEGRATION,
+ /* S_NOT_DC ==> */ S_RECOVERY,
+ /* S_POLICY_ENGINE ==> */ S_INTEGRATION,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STOPPING,
+ /* S_PENDING ==> */ S_STOPPING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_HALT ==> */ S_RELEASE_DC,
+ },
+
+/* Got an I_INTEGRATED */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_FINALIZE_JOIN,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_RECOVERY,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_FINALIZED */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_POLICY_ENGINE,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_RECOVERY,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_NODE_JOIN */
+ {
+ /* S_IDLE ==> */ S_INTEGRATION,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_INTEGRATION,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_INTEGRATION,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_NOT_DC */
+ {
+ /* S_IDLE ==> */ S_RECOVERY,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_RECOVERY,
+ /* S_FINALIZE_JOIN ==> */ S_RECOVERY,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_RECOVERY,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_NOT_DC,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_RECOVERY,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_RECOVERED */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_INTEGRATION,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_PENDING,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_RELEASE_FAIL */
+ {
+ /* S_IDLE ==> */ S_STOPPING,
+ /* S_ELECTION ==> */ S_STOPPING,
+ /* S_INTEGRATION ==> */ S_STOPPING,
+ /* S_FINALIZE_JOIN ==> */ S_STOPPING,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_STOPPING,
+ /* S_RECOVERY ==> */ S_STOPPING,
+ /* S_RELEASE_DC ==> */ S_STOPPING,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_STOPPING,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_RELEASE_SUCCESS */
+ {
+ /* S_IDLE ==> */ S_RECOVERY,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_RECOVERY,
+ /* S_FINALIZE_JOIN ==> */ S_RECOVERY,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_RECOVERY,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_PENDING,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_RECOVERY,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_RESTART */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_TE_SUCCESS */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_IDLE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_ROUTER */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_SHUTDOWN */
+ {
+ /* S_IDLE ==> */ S_POLICY_ENGINE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_STOPPING,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STOPPING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_HALT ==> */ S_ELECTION,
+ },
+
+/* Got an I_STOP */
+ {
+ /* S_IDLE ==> */ S_STOPPING,
+ /* S_ELECTION ==> */ S_STOPPING,
+ /* S_INTEGRATION ==> */ S_STOPPING,
+ /* S_FINALIZE_JOIN ==> */ S_STOPPING,
+ /* S_NOT_DC ==> */ S_STOPPING,
+ /* S_POLICY_ENGINE ==> */ S_STOPPING,
+ /* S_RECOVERY ==> */ S_STOPPING,
+ /* S_RELEASE_DC ==> */ S_STOPPING,
+ /* S_STARTING ==> */ S_STOPPING,
+ /* S_PENDING ==> */ S_STOPPING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_STOPPING,
+ /* S_HALT ==> */ S_STOPPING,
+ },
+
+/* Got an I_TERMINATE */
+ {
+ /* S_IDLE ==> */ S_TERMINATE,
+ /* S_ELECTION ==> */ S_TERMINATE,
+ /* S_INTEGRATION ==> */ S_TERMINATE,
+ /* S_FINALIZE_JOIN ==> */ S_TERMINATE,
+ /* S_NOT_DC ==> */ S_TERMINATE,
+ /* S_POLICY_ENGINE ==> */ S_TERMINATE,
+ /* S_RECOVERY ==> */ S_TERMINATE,
+ /* S_RELEASE_DC ==> */ S_TERMINATE,
+ /* S_STARTING ==> */ S_TERMINATE,
+ /* S_PENDING ==> */ S_TERMINATE,
+ /* S_STOPPING ==> */ S_TERMINATE,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TERMINATE,
+ /* S_HALT ==> */ S_TERMINATE,
+ },
+
+/* Got an I_STARTUP */
+ {
+ /* S_IDLE ==> */ S_RECOVERY,
+ /* S_ELECTION ==> */ S_RECOVERY,
+ /* S_INTEGRATION ==> */ S_RECOVERY,
+ /* S_FINALIZE_JOIN ==> */ S_RECOVERY,
+ /* S_NOT_DC ==> */ S_RECOVERY,
+ /* S_POLICY_ENGINE ==> */ S_RECOVERY,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_RECOVERY,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_PE_SUCCESS */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_JOIN_OFFER */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_PENDING,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_JOIN_REQUEST */
+ {
+ /* S_IDLE ==> */ S_INTEGRATION,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_INTEGRATION,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_INTEGRATION,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_JOIN_RESULT */
+ {
+ /* S_IDLE ==> */ S_INTEGRATION,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_PENDING,
+ /* S_POLICY_ENGINE ==> */ S_INTEGRATION,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_RECOVERY,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_WAIT_FOR_EVENT */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_DC_HEARTBEAT */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_LRM_EVENT */
+ {
+ /* S_IDLE ==> */ S_IDLE,
+ /* S_ELECTION ==> */ S_ELECTION,
+ /* S_INTEGRATION ==> */ S_INTEGRATION,
+ /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN,
+ /* S_NOT_DC ==> */ S_NOT_DC,
+ /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_PENDING */
+ {
+ /* S_IDLE ==> */ S_PENDING,
+ /* S_ELECTION ==> */ S_PENDING,
+ /* S_INTEGRATION ==> */ S_PENDING,
+ /* S_FINALIZE_JOIN ==> */ S_PENDING,
+ /* S_NOT_DC ==> */ S_PENDING,
+ /* S_POLICY_ENGINE ==> */ S_PENDING,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_PENDING,
+ /* S_PENDING ==> */ S_PENDING,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_PENDING,
+ /* S_HALT ==> */ S_HALT,
+ },
+
+/* Got an I_HALT */
+ {
+ /* S_IDLE ==> */ S_HALT,
+ /* S_ELECTION ==> */ S_HALT,
+ /* S_INTEGRATION ==> */ S_HALT,
+ /* S_FINALIZE_JOIN ==> */ S_HALT,
+ /* S_NOT_DC ==> */ S_HALT,
+ /* S_POLICY_ENGINE ==> */ S_HALT,
+ /* S_RECOVERY ==> */ S_RECOVERY,
+ /* S_RELEASE_DC ==> */ S_RELEASE_DC,
+ /* S_STARTING ==> */ S_STARTING,
+ /* S_PENDING ==> */ S_HALT,
+ /* S_STOPPING ==> */ S_STOPPING,
+ /* S_TERMINATE ==> */ S_TERMINATE,
+ /* S_TRANSITION_ENGINE ==> */ S_HALT,
+ /* S_HALT ==> */ S_HALT,
+ },
+};
+
+/*
+ * The action table. Each entry is a set of actions to take or-ed
+ * together. Like the state table, the rows are inputs, and
+ * the columns are states.
+ */
+
+/* NOTE: In the fsa, the actions are extracted then state is updated. */
+
+static const uint64_t fsa_actions[MAXINPUT][MAXSTATE] = {
+
+/* Got an I_NULL */
+ {
+ /* S_IDLE ==> */ A_NOTHING,
+ /* S_ELECTION ==> */ A_NOTHING,
+ /* S_INTEGRATION ==> */ A_NOTHING,
+ /* S_FINALIZE_JOIN ==> */ A_NOTHING,
+ /* S_NOT_DC ==> */ A_NOTHING,
+ /* S_POLICY_ENGINE ==> */ A_NOTHING,
+ /* S_RECOVERY ==> */ A_NOTHING,
+ /* S_RELEASE_DC ==> */ A_NOTHING,
+ /* S_STARTING ==> */ A_NOTHING,
+ /* S_PENDING ==> */ A_NOTHING,
+ /* S_STOPPING ==> */ A_NOTHING,
+ /* S_TERMINATE ==> */ A_NOTHING,
+ /* S_TRANSITION_ENGINE ==> */ A_NOTHING,
+ /* S_HALT ==> */ A_NOTHING,
+ },
+
+/* Got an I_CIB_OP */
+ {
+ /* S_IDLE ==> */ A_ERROR,
+ /* S_ELECTION ==> */ A_ERROR,
+ /* S_INTEGRATION ==> */ A_ERROR,
+ /* S_FINALIZE_JOIN ==> */ A_ERROR,
+ /* S_NOT_DC ==> */ A_ERROR,
+ /* S_POLICY_ENGINE ==> */ A_ERROR,
+ /* S_RECOVERY ==> */ A_ERROR,
+ /* S_RELEASE_DC ==> */ A_ERROR,
+ /* S_STARTING ==> */ A_ERROR,
+ /* S_PENDING ==> */ A_ERROR,
+ /* S_STOPPING ==> */ A_ERROR,
+ /* S_TERMINATE ==> */ A_ERROR,
+ /* S_TRANSITION_ENGINE ==> */ A_ERROR,
+ /* S_HALT ==> */ A_ERROR,
+ },
+
+/* Got an I_CIB_UPDATE */
+ {
+ /* S_IDLE ==> */ A_LOG,
+ /* S_ELECTION ==> */ A_LOG,
+ /* S_INTEGRATION ==> */ A_WARN,
+ /* S_FINALIZE_JOIN ==> */ A_WARN,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_LOG,
+ /* S_RECOVERY ==> */ A_WARN,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_LOG,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_DC_TIMEOUT */
+ {
+ /* S_IDLE ==> */ A_WARN,
+ /* S_ELECTION ==> */ A_ELECTION_VOTE,
+ /* S_INTEGRATION ==> */ A_WARN,
+ /* S_FINALIZE_JOIN ==> */ A_WARN,
+ /* S_NOT_DC ==> */ A_ELECTION_VOTE | A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_WARN,
+ /* S_RECOVERY ==> */ A_NOTHING,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_ELECTION_VOTE | A_WARN,
+ /* S_STOPPING ==> */ A_NOTHING,
+ /* S_TERMINATE ==> */ A_NOTHING,
+ /* S_TRANSITION_ENGINE ==> */ A_TE_CANCEL | A_WARN,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_ELECTION */
+ {
+ /* S_IDLE ==> */ A_ELECTION_VOTE,
+ /* S_ELECTION ==> */ A_ELECTION_VOTE,
+ /* S_INTEGRATION ==> */ A_ELECTION_VOTE,
+ /* S_FINALIZE_JOIN ==> */ A_ELECTION_VOTE,
+ /* S_NOT_DC ==> */ A_ELECTION_VOTE,
+ /* S_POLICY_ENGINE ==> */ A_ELECTION_VOTE,
+ /* S_RECOVERY ==> */ A_LOG,
+ /* S_RELEASE_DC ==> */ A_LOG,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_ELECTION_VOTE,
+ /* S_STOPPING ==> */ A_LOG,
+ /* S_TERMINATE ==> */ A_LOG,
+ /* S_TRANSITION_ENGINE ==> */ A_ELECTION_VOTE,
+ /* S_HALT ==> */ A_ELECTION_VOTE,
+ },
+
+/* Got an I_PE_CALC */
+ {
+ /* S_IDLE ==> */ A_PE_INVOKE,
+ /* S_ELECTION ==> */ A_NOTHING,
+ /* S_INTEGRATION ==> */ A_NOTHING,
+ /* S_FINALIZE_JOIN ==> */ A_NOTHING,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_PE_INVOKE,
+ /* S_RECOVERY ==> */ A_NOTHING,
+ /* S_RELEASE_DC ==> */ A_NOTHING,
+ /* S_STARTING ==> */ A_ERROR,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_ERROR,
+ /* S_TRANSITION_ENGINE ==> */ A_PE_INVOKE,
+ /* S_HALT ==> */ A_ERROR,
+ },
+
+/* Got an I_RELEASE_DC */
+ {
+ /* S_IDLE ==> */ O_RELEASE,
+ /* S_ELECTION ==> */ O_RELEASE,
+ /* S_INTEGRATION ==> */ O_RELEASE | A_WARN,
+ /* S_FINALIZE_JOIN ==> */ O_RELEASE | A_WARN,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ O_RELEASE | A_WARN,
+ /* S_RECOVERY ==> */ O_RELEASE,
+ /* S_RELEASE_DC ==> */ O_RELEASE | A_WARN,
+ /* S_STARTING ==> */ A_ERROR,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ O_RELEASE | A_WARN,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_ELECTION_DC */
+ {
+ /* S_IDLE ==> */ A_WARN | A_ELECTION_VOTE,
+ /* S_ELECTION ==> */
+ A_LOG | A_DC_TAKEOVER | A_PE_START | A_TE_START | A_DC_JOIN_OFFER_ALL | A_DC_TIMER_STOP,
+ /* S_INTEGRATION ==> */ A_WARN | A_ELECTION_VOTE | A_DC_JOIN_OFFER_ALL,
+ /* S_FINALIZE_JOIN ==> */ A_WARN | A_ELECTION_VOTE | A_DC_JOIN_OFFER_ALL,
+ /* S_NOT_DC ==> */ A_LOG | A_ELECTION_VOTE,
+ /* S_POLICY_ENGINE ==> */ A_WARN | A_ELECTION_VOTE,
+ /* S_RECOVERY ==> */ A_WARN,
+ /* S_RELEASE_DC ==> */ A_WARN | A_ELECTION_VOTE,
+ /* S_STARTING ==> */ A_LOG | A_WARN,
+ /* S_PENDING ==> */ A_LOG | A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_WARN | A_ELECTION_VOTE,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_ERROR */
+ {
+ /* S_IDLE ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START,
+ /* S_ELECTION ==> */ A_ERROR | A_RECOVER | O_RELEASE,
+ /* S_INTEGRATION ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START,
+ /* S_FINALIZE_JOIN ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START,
+ /* S_NOT_DC ==> */ A_ERROR | A_RECOVER,
+ /* S_POLICY_ENGINE ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START,
+ /* S_RECOVERY ==> */ A_ERROR | O_RELEASE,
+ /* S_RELEASE_DC ==> */ A_ERROR | A_RECOVER,
+ /* S_STARTING ==> */ A_ERROR | A_RECOVER,
+ /* S_PENDING ==> */ A_ERROR | A_RECOVER,
+ /* S_STOPPING ==> */ A_ERROR | A_EXIT_1,
+ /* S_TERMINATE ==> */ A_ERROR | A_EXIT_1,
+ /* S_TRANSITION_ENGINE ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START,
+ /* S_HALT ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START,
+ },
+
+/* Got an I_FAIL */
+ {
+ /* S_IDLE ==> */ A_WARN,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_WARN | A_DC_JOIN_OFFER_ALL,
+ /* S_FINALIZE_JOIN ==> */ A_WARN | A_DC_JOIN_OFFER_ALL,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_WARN | A_DC_JOIN_OFFER_ALL | A_TE_CANCEL,
+ /* S_RECOVERY ==> */ A_WARN | O_RELEASE,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN | A_EXIT_1,
+ /* S_TRANSITION_ENGINE ==> */ A_WARN | O_LRM_RECONNECT | A_PE_INVOKE | A_TE_CANCEL,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_INTEGRATED */
+ {
+ /* S_IDLE ==> */ A_NOTHING,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_DC_JOIN_FINALIZE,
+ /* S_FINALIZE_JOIN ==> */ A_WARN,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_NOTHING,
+ /* S_RECOVERY ==> */ A_WARN,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_NOTHING,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_FINALIZED */
+ {
+ /* S_IDLE ==> */ A_NOTHING,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_WARN,
+ /* S_FINALIZE_JOIN ==> */ A_DC_JOIN_FINAL | A_TE_CANCEL,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_NOTHING,
+ /* S_RECOVERY ==> */ A_WARN,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_NOTHING,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_NODE_JOIN */
+ {
+ /* S_IDLE ==> */ A_TE_HALT | A_DC_JOIN_OFFER_ONE,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_DC_JOIN_OFFER_ONE,
+ /* S_FINALIZE_JOIN ==> */ A_DC_JOIN_OFFER_ONE,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_DC_JOIN_OFFER_ONE,
+ /* S_RECOVERY ==> */ A_WARN,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_TE_HALT | A_DC_JOIN_OFFER_ONE,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_NOT_DC */
+ {
+ /* S_IDLE ==> */ A_WARN | O_RELEASE,
+ /* S_ELECTION ==> */ A_ERROR | A_ELECTION_START | A_DC_TIMER_STOP,
+ /* S_INTEGRATION ==> */ A_ERROR | O_RELEASE,
+ /* S_FINALIZE_JOIN ==> */ A_ERROR | O_RELEASE,
+ /* S_NOT_DC ==> */ A_LOG,
+ /* S_POLICY_ENGINE ==> */ A_ERROR | O_RELEASE,
+ /* S_RECOVERY ==> */ A_ERROR | O_RELEASE,
+ /* S_RELEASE_DC ==> */ A_ERROR | O_RELEASE,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_LOG | A_DC_TIMER_STOP,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_ERROR | O_RELEASE,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_RECOVERED */
+ {
+ /* S_IDLE ==> */ A_WARN,
+ /* S_ELECTION ==> */ A_ELECTION_VOTE,
+ /* S_INTEGRATION ==> */ A_WARN,
+ /* S_FINALIZE_JOIN ==> */ A_WARN,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_WARN,
+ /* S_RECOVERY ==> */ A_LOG,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_WARN,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_RELEASE_FAIL */
+ {
+ /* S_IDLE ==> */ A_WARN,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_WARN,
+ /* S_FINALIZE_JOIN ==> */ A_WARN,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_NOTHING,
+ /* S_RECOVERY ==> */ A_WARN | A_SHUTDOWN_REQ,
+ /* S_RELEASE_DC ==> */ A_NOTHING,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_WARN,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_RELEASE_SUCCESS */
+ {
+ /* S_IDLE ==> */ A_WARN,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_WARN,
+ /* S_FINALIZE_JOIN ==> */ A_WARN,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_WARN,
+ /* S_RECOVERY ==> */ A_WARN,
+ /* S_RELEASE_DC ==> */ A_LOG,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_LOG,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_WARN,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_RESTART */
+ {
+ /* S_IDLE ==> */ A_NOTHING,
+ /* S_ELECTION ==> */ A_LOG | A_ELECTION_VOTE,
+ /* S_INTEGRATION ==> */ A_LOG | A_DC_JOIN_OFFER_ALL,
+ /* S_FINALIZE_JOIN ==> */ A_LOG | A_DC_JOIN_FINALIZE,
+ /* S_NOT_DC ==> */ A_LOG | A_NOTHING,
+ /* S_POLICY_ENGINE ==> */ A_LOG | A_PE_INVOKE,
+ /* S_RECOVERY ==> */ A_LOG | A_RECOVER | O_RELEASE,
+ /* S_RELEASE_DC ==> */ A_LOG | O_RELEASE,
+ /* S_STARTING ==> */ A_LOG,
+ /* S_PENDING ==> */ A_LOG,
+ /* S_STOPPING ==> */ A_LOG,
+ /* S_TERMINATE ==> */ A_LOG,
+ /* S_TRANSITION_ENGINE ==> */ A_LOG | A_TE_INVOKE,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_TE_SUCCESS */
+ {
+ /* S_IDLE ==> */ A_LOG,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_WARN,
+ /* S_FINALIZE_JOIN ==> */ A_WARN,
+ /* S_NOT_DC ==> */ A_ERROR,
+ /* S_POLICY_ENGINE ==> */ A_WARN,
+ /* S_RECOVERY ==> */ A_RECOVER | A_WARN,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */ A_ERROR,
+ /* S_PENDING ==> */ A_ERROR,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_LOG,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_ROUTER */
+ {
+ /* S_IDLE ==> */ A_MSG_ROUTE,
+ /* S_ELECTION ==> */ A_MSG_ROUTE,
+ /* S_INTEGRATION ==> */ A_MSG_ROUTE,
+ /* S_FINALIZE_JOIN ==> */ A_MSG_ROUTE,
+ /* S_NOT_DC ==> */ A_MSG_ROUTE,
+ /* S_POLICY_ENGINE ==> */ A_MSG_ROUTE,
+ /* S_RECOVERY ==> */ A_MSG_ROUTE,
+ /* S_RELEASE_DC ==> */ A_MSG_ROUTE,
+ /* S_STARTING ==> */ A_MSG_ROUTE,
+ /* S_PENDING ==> */ A_MSG_ROUTE,
+ /* S_STOPPING ==> */ A_MSG_ROUTE,
+ /* S_TERMINATE ==> */ A_MSG_ROUTE,
+ /* S_TRANSITION_ENGINE ==> */ A_MSG_ROUTE,
+ /* S_HALT ==> */ A_WARN | A_MSG_ROUTE,
+ },
+
+/* Got an I_SHUTDOWN */
+ {
+ /* S_IDLE ==> */ A_LOG | A_SHUTDOWN_REQ,
+ /* S_ELECTION ==> */ A_LOG | A_SHUTDOWN_REQ | A_ELECTION_VOTE,
+ /* S_INTEGRATION ==> */ A_LOG | A_SHUTDOWN_REQ,
+ /* S_FINALIZE_JOIN ==> */ A_LOG | A_SHUTDOWN_REQ,
+ /* S_NOT_DC ==> */ A_SHUTDOWN_REQ,
+ /* S_POLICY_ENGINE ==> */ A_LOG | A_SHUTDOWN_REQ,
+ /* S_RECOVERY ==> */ A_WARN | O_EXIT | O_RELEASE,
+ /* S_RELEASE_DC ==> */ A_WARN | A_SHUTDOWN_REQ,
+ /* S_STARTING ==> */ A_WARN | O_EXIT,
+ /* S_PENDING ==> */ A_SHUTDOWN_REQ,
+ /* S_STOPPING ==> */ A_LOG,
+ /* S_TERMINATE ==> */ A_LOG,
+ /* S_TRANSITION_ENGINE ==> */ A_WARN | A_SHUTDOWN_REQ,
+ /* S_HALT ==> */ A_WARN | A_ELECTION_START | A_SHUTDOWN_REQ,
+ },
+
+/* Got an I_STOP */
+ {
+ /* S_IDLE ==> */ A_ERROR | O_RELEASE | O_EXIT,
+ /* S_ELECTION ==> */ O_RELEASE | O_EXIT,
+ /* S_INTEGRATION ==> */ A_WARN | O_RELEASE | O_EXIT,
+ /* S_FINALIZE_JOIN ==> */ A_ERROR | O_RELEASE | O_EXIT,
+ /* S_NOT_DC ==> */ O_EXIT,
+ /* S_POLICY_ENGINE ==> */ A_WARN | O_RELEASE | O_EXIT,
+ /* S_RECOVERY ==> */ A_ERROR | O_RELEASE | O_EXIT,
+ /* S_RELEASE_DC ==> */ A_ERROR | O_RELEASE | O_EXIT,
+ /* S_STARTING ==> */ O_EXIT,
+ /* S_PENDING ==> */ O_EXIT,
+ /* S_STOPPING ==> */ O_EXIT,
+ /* S_TERMINATE ==> */ A_ERROR | A_EXIT_1,
+ /* S_TRANSITION_ENGINE ==> */ A_LOG | O_RELEASE | O_EXIT,
+ /* S_HALT ==> */ O_RELEASE | O_EXIT | A_WARN,
+ },
+
+/* Got an I_TERMINATE */
+ {
+ /* S_IDLE ==> */ A_ERROR | O_EXIT,
+ /* S_ELECTION ==> */ A_ERROR | O_EXIT,
+ /* S_INTEGRATION ==> */ A_ERROR | O_EXIT,
+ /* S_FINALIZE_JOIN ==> */ A_ERROR | O_EXIT,
+ /* S_NOT_DC ==> */ A_ERROR | O_EXIT,
+ /* S_POLICY_ENGINE ==> */ A_ERROR | O_EXIT,
+ /* S_RECOVERY ==> */ A_ERROR | O_EXIT,
+ /* S_RELEASE_DC ==> */ A_ERROR | O_EXIT,
+ /* S_STARTING ==> */ O_EXIT,
+ /* S_PENDING ==> */ A_ERROR | O_EXIT,
+ /* S_STOPPING ==> */ O_EXIT,
+ /* S_TERMINATE ==> */ O_EXIT,
+ /* S_TRANSITION_ENGINE ==> */ A_ERROR | O_EXIT,
+ /* S_HALT ==> */ A_ERROR | O_EXIT,
+ },
+
+/* Got an I_STARTUP */
+ {
+ /* S_IDLE ==> */ A_WARN,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_WARN,
+ /* S_FINALIZE_JOIN ==> */ A_WARN,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_WARN,
+ /* S_RECOVERY ==> */ A_WARN,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */
+ A_LOG | A_STARTUP | A_CIB_START | A_LRM_CONNECT | A_HA_CONNECT | A_READCONFIG | A_STARTED,
+ /* S_PENDING ==> */ A_LOG,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_WARN,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_PE_SUCCESS */
+ {
+ /* S_IDLE ==> */ A_LOG,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_WARN,
+ /* S_FINALIZE_JOIN ==> */ A_WARN,
+ /* S_NOT_DC ==> */ A_NOTHING,
+ /* S_POLICY_ENGINE ==> */ A_TE_INVOKE,
+ /* S_RECOVERY ==> */ A_RECOVER | A_LOG,
+ /* S_RELEASE_DC ==> */ A_LOG,
+ /* S_STARTING ==> */ A_ERROR,
+ /* S_PENDING ==> */ A_LOG,
+ /* S_STOPPING ==> */ A_ERROR,
+ /* S_TERMINATE ==> */ A_ERROR,
+ /* S_TRANSITION_ENGINE ==> */ A_LOG,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_JOIN_OFFER */
+ {
+ /* S_IDLE ==> */ A_WARN | A_CL_JOIN_REQUEST,
+ /* S_ELECTION ==> */ A_WARN | A_ELECTION_VOTE,
+ /* S_INTEGRATION ==> */ A_CL_JOIN_REQUEST,
+ /* S_FINALIZE_JOIN ==> */ A_CL_JOIN_REQUEST,
+ /* S_NOT_DC ==> */ A_CL_JOIN_REQUEST | A_DC_TIMER_STOP,
+ /* S_POLICY_ENGINE ==> */ A_WARN | A_CL_JOIN_REQUEST,
+ /* S_RECOVERY ==> */ A_WARN | A_CL_JOIN_REQUEST | A_DC_TIMER_STOP,
+ /* S_RELEASE_DC ==> */ A_WARN | A_CL_JOIN_REQUEST | A_DC_TIMER_STOP,
+ /* S_STARTING ==> */ A_LOG,
+ /* S_PENDING ==> */ A_CL_JOIN_REQUEST | A_DC_TIMER_STOP,
+ /* S_STOPPING ==> */ A_LOG,
+ /* S_TERMINATE ==> */ A_LOG,
+ /* S_TRANSITION_ENGINE ==> */ A_WARN | A_CL_JOIN_REQUEST,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_JOIN_REQUEST */
+ {
+ /* S_IDLE ==> */ A_DC_JOIN_OFFER_ONE,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_DC_JOIN_PROCESS_REQ,
+ /* S_FINALIZE_JOIN ==> */ A_DC_JOIN_OFFER_ONE,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_DC_JOIN_OFFER_ONE,
+ /* S_RECOVERY ==> */ A_WARN,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_DC_JOIN_OFFER_ONE,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_JOIN_RESULT */
+ {
+ /* S_IDLE ==> */ A_ERROR | A_TE_HALT | A_DC_JOIN_OFFER_ALL,
+ /* S_ELECTION ==> */ A_LOG,
+ /* S_INTEGRATION ==> */ A_LOG | A_CL_JOIN_RESULT | A_DC_JOIN_PROCESS_ACK,
+ /* S_FINALIZE_JOIN ==> */ A_CL_JOIN_RESULT | A_DC_JOIN_PROCESS_ACK,
+ /* S_NOT_DC ==> */ A_ERROR | A_CL_JOIN_ANNOUNCE,
+ /* S_POLICY_ENGINE ==> */ A_ERROR | A_TE_HALT | A_DC_JOIN_OFFER_ALL,
+ /* S_RECOVERY ==> */ A_LOG,
+ /* S_RELEASE_DC ==> */ A_LOG,
+ /* S_STARTING ==> */ A_ERROR,
+ /* S_PENDING ==> */ A_CL_JOIN_RESULT,
+ /* S_STOPPING ==> */ A_ERROR,
+ /* S_TERMINATE ==> */ A_ERROR,
+ /* S_TRANSITION_ENGINE ==> */ A_ERROR | A_TE_HALT | A_DC_JOIN_OFFER_ALL,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_WAIT_FOR_EVENT */
+ {
+ /* S_IDLE ==> */ A_LOG,
+ /* S_ELECTION ==> */ A_LOG,
+ /* S_INTEGRATION ==> */ A_LOG,
+ /* S_FINALIZE_JOIN ==> */ A_LOG,
+ /* S_NOT_DC ==> */ A_LOG,
+ /* S_POLICY_ENGINE ==> */ A_LOG,
+ /* S_RECOVERY ==> */ A_LOG,
+ /* S_RELEASE_DC ==> */ A_LOG,
+ /* S_STARTING ==> */ A_LOG,
+ /* S_PENDING ==> */ A_LOG,
+ /* S_STOPPING ==> */ A_LOG,
+ /* S_TERMINATE ==> */ A_LOG,
+ /* S_TRANSITION_ENGINE ==> */ A_LOG,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_DC_HEARTBEAT */
+ {
+ /* S_IDLE ==> */ A_ERROR,
+ /* S_ELECTION ==> */ A_WARN | A_ELECTION_VOTE,
+ /* S_INTEGRATION ==> */ A_ERROR,
+ /* S_FINALIZE_JOIN ==> */ A_ERROR,
+ /* S_NOT_DC ==> */ A_NOTHING,
+ /* S_POLICY_ENGINE ==> */ A_ERROR,
+ /* S_RECOVERY ==> */ A_NOTHING,
+ /* S_RELEASE_DC ==> */ A_LOG,
+ /* S_STARTING ==> */ A_LOG,
+ /* S_PENDING ==> */ A_LOG | A_CL_JOIN_ANNOUNCE,
+ /* S_STOPPING ==> */ A_NOTHING,
+ /* S_TERMINATE ==> */ A_NOTHING,
+ /* S_TRANSITION_ENGINE ==> */ A_ERROR,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_LRM_EVENT */
+ {
+ /* S_IDLE ==> */ A_LRM_EVENT,
+ /* S_ELECTION ==> */ A_LRM_EVENT,
+ /* S_INTEGRATION ==> */ A_LRM_EVENT,
+ /* S_FINALIZE_JOIN ==> */ A_LRM_EVENT,
+ /* S_NOT_DC ==> */ A_LRM_EVENT,
+ /* S_POLICY_ENGINE ==> */ A_LRM_EVENT,
+ /* S_RECOVERY ==> */ A_LRM_EVENT,
+ /* S_RELEASE_DC ==> */ A_LRM_EVENT,
+ /* S_STARTING ==> */ A_LRM_EVENT,
+ /* S_PENDING ==> */ A_LRM_EVENT,
+ /* S_STOPPING ==> */ A_LRM_EVENT,
+ /* S_TERMINATE ==> */ A_LRM_EVENT,
+ /* S_TRANSITION_ENGINE ==> */ A_LRM_EVENT,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* For everyone ending up in S_PENDING, (re)start the DC timer and wait for I_JOIN_OFFER or I_NOT_DC */
+/* Got an I_PENDING */
+ {
+ /* S_IDLE ==> */ O_RELEASE | O_DC_TIMER_RESTART,
+ /* S_ELECTION ==> */ O_RELEASE | O_DC_TIMER_RESTART,
+ /* S_INTEGRATION ==> */ O_RELEASE | O_DC_TIMER_RESTART,
+ /* S_FINALIZE_JOIN ==> */ O_RELEASE | O_DC_TIMER_RESTART,
+ /* S_NOT_DC ==> */ A_LOG | O_DC_TIMER_RESTART,
+ /* S_POLICY_ENGINE ==> */ O_RELEASE | O_DC_TIMER_RESTART,
+ /* S_RECOVERY ==> */ A_WARN,
+ /* S_RELEASE_DC ==> */ A_WARN | O_DC_TIMER_RESTART,
+ /* S_STARTING ==> */ A_LOG | A_DC_TIMER_START | A_CL_JOIN_QUERY,
+ /* S_PENDING ==> */ A_LOG | O_DC_TIMER_RESTART,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ O_RELEASE | O_DC_TIMER_RESTART,
+ /* S_HALT ==> */ A_WARN,
+ },
+
+/* Got an I_HALT */
+ {
+ /* S_IDLE ==> */ A_WARN,
+ /* S_ELECTION ==> */ A_WARN,
+ /* S_INTEGRATION ==> */ A_WARN,
+ /* S_FINALIZE_JOIN ==> */ A_WARN,
+ /* S_NOT_DC ==> */ A_WARN,
+ /* S_POLICY_ENGINE ==> */ A_WARN,
+ /* S_RECOVERY ==> */ A_WARN,
+ /* S_RELEASE_DC ==> */ A_WARN,
+ /* S_STARTING ==> */ A_WARN,
+ /* S_PENDING ==> */ A_WARN,
+ /* S_STOPPING ==> */ A_WARN,
+ /* S_TERMINATE ==> */ A_WARN,
+ /* S_TRANSITION_ENGINE ==> */ A_WARN,
+ /* S_HALT ==> */ A_WARN,
+ },
+};
+
+/*!
+ * \internal
+ * \brief Get the next FSA state given an input and the current state
+ *
+ * \param[in] input FSA input
+ *
+ * \return The next FSA state
+ */
+enum crmd_fsa_state
+controld_fsa_get_next_state(enum crmd_fsa_input input)
+{
+ return fsa_next_states[input][controld_globals.fsa_state];
+}
+
+/*!
+ * \internal
+ * \brief Get the appropriate FSA action given an input and the current state
+ *
+ * \param[in] input FSA input
+ *
+ * \return The appropriate FSA action
+ */
+uint64_t
+controld_fsa_get_action(enum crmd_fsa_input input)
+{
+ return fsa_actions[input][controld_globals.fsa_state];
+}
diff --git a/daemons/controld/controld_membership.c b/daemons/controld/controld_membership.c
new file mode 100644
index 0000000..1f7e4c0
--- /dev/null
+++ b/daemons/controld/controld_membership.c
@@ -0,0 +1,457 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+/* put these first so that uuid_t is defined without conflicts */
+#include <crm_internal.h>
+
+#include <string.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/common/xml_internal.h>
+#include <crm/cluster/internal.h>
+
+#include <pacemaker-controld.h>
+
+void post_cache_update(int instance);
+
+extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
+
+static void
+reap_dead_nodes(gpointer key, gpointer value, gpointer user_data)
+{
+ crm_node_t *node = value;
+
+ if (crm_is_peer_active(node) == FALSE) {
+ crm_update_peer_join(__func__, node, crm_join_none);
+
+ if(node && node->uname) {
+ if (pcmk__str_eq(controld_globals.our_nodename, node->uname,
+ pcmk__str_casei)) {
+ crm_err("We're not part of the cluster anymore");
+ register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
+
+ } else if (!AM_I_DC
+ && pcmk__str_eq(node->uname, controld_globals.dc_name,
+ pcmk__str_casei)) {
+ crm_warn("Our DC node (%s) left the cluster", node->uname);
+ register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
+ }
+ }
+
+ if ((controld_globals.fsa_state == S_INTEGRATION)
+ || (controld_globals.fsa_state == S_FINALIZE_JOIN)) {
+ check_join_state(controld_globals.fsa_state, __func__);
+ }
+ if ((node != NULL) && (node->uuid != NULL)) {
+ fail_incompletable_actions(controld_globals.transition_graph,
+ node->uuid);
+ }
+ }
+}
+
+void
+post_cache_update(int instance)
+{
+ xmlNode *no_op = NULL;
+
+ crm_peer_seq = instance;
+ crm_debug("Updated cache after membership event %d.", instance);
+
+ g_hash_table_foreach(crm_peer_cache, reap_dead_nodes, NULL);
+ controld_set_fsa_input_flags(R_MEMBERSHIP);
+
+ if (AM_I_DC) {
+ populate_cib_nodes(node_update_quick | node_update_cluster | node_update_peer |
+ node_update_expected, __func__);
+ }
+
+ /*
+ * If we lost nodes, we should re-check the election status
+ * Safe to call outside of an election
+ */
+ controld_set_fsa_action_flags(A_ELECTION_CHECK);
+ controld_trigger_fsa();
+
+ /* Membership changed, remind everyone we're here.
+ * This will aid detection of duplicate DCs
+ */
+ no_op = create_request(CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_CRMD,
+ AM_I_DC ? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD, NULL);
+ send_cluster_message(NULL, crm_msg_crmd, no_op, FALSE);
+ free_xml(no_op);
+}
+
+static void
+crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ fsa_data_t *msg_data = NULL;
+
+ if (rc == pcmk_ok) {
+ crm_trace("Node update %d complete", call_id);
+
+ } else if(call_id < pcmk_ok) {
+ crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id);
+ crm_log_xml_debug(msg, "failed");
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+
+ } else {
+ crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
+ crm_log_xml_debug(msg, "failed");
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Create an XML node state tag with updates
+ *
+ * \param[in,out] node Node whose state will be used for update
+ * \param[in] flags Bitmask of node_update_flags indicating what to update
+ * \param[in,out] parent XML node to contain update (or NULL)
+ * \param[in] source Who requested the update (only used for logging)
+ *
+ * \return Pointer to created node state tag
+ */
+xmlNode *
+create_node_state_update(crm_node_t *node, int flags, xmlNode *parent,
+ const char *source)
+{
+ const char *value = NULL;
+ xmlNode *node_state;
+
+ if (!node->state) {
+ crm_info("Node update for %s cancelled: no state, not seen yet", node->uname);
+ return NULL;
+ }
+
+ node_state = create_xml_node(parent, XML_CIB_TAG_STATE);
+
+ if (pcmk_is_set(node->flags, crm_remote_node)) {
+ pcmk__xe_set_bool_attr(node_state, XML_NODE_IS_REMOTE, true);
+ }
+
+ set_uuid(node_state, XML_ATTR_ID, node);
+
+ if (crm_element_value(node_state, XML_ATTR_ID) == NULL) {
+ crm_info("Node update for %s cancelled: no id", node->uname);
+ free_xml(node_state);
+ return NULL;
+ }
+
+ crm_xml_add(node_state, XML_ATTR_UNAME, node->uname);
+
+ if ((flags & node_update_cluster) && node->state) {
+ pcmk__xe_set_bool_attr(node_state, XML_NODE_IN_CLUSTER,
+ pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei));
+ }
+
+ if (!pcmk_is_set(node->flags, crm_remote_node)) {
+ if (flags & node_update_peer) {
+ value = OFFLINESTATUS;
+ if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
+ value = ONLINESTATUS;
+ }
+ crm_xml_add(node_state, XML_NODE_IS_PEER, value);
+ }
+
+ if (flags & node_update_join) {
+ if (node->join <= crm_join_none) {
+ value = CRMD_JOINSTATE_DOWN;
+ } else {
+ value = CRMD_JOINSTATE_MEMBER;
+ }
+ crm_xml_add(node_state, XML_NODE_JOIN_STATE, value);
+ }
+
+ if (flags & node_update_expected) {
+ crm_xml_add(node_state, XML_NODE_EXPECTED, node->expected);
+ }
+ }
+
+ crm_xml_add(node_state, XML_ATTR_ORIGIN, source);
+
+ return node_state;
+}
+
+static void
+remove_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
+ xmlNode * output, void *user_data)
+{
+ char *node_uuid = user_data;
+
+ do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE,
+ "Deletion of the unknown conflicting node \"%s\": %s (rc=%d)",
+ node_uuid, pcmk_strerror(rc), rc);
+}
+
+static void
+search_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
+ xmlNode * output, void *user_data)
+{
+ char *new_node_uuid = user_data;
+ xmlNode *node_xml = NULL;
+
+ if (rc != pcmk_ok) {
+ if (rc != -ENXIO) {
+ crm_notice("Searching conflicting nodes for %s failed: %s (%d)",
+ new_node_uuid, pcmk_strerror(rc), rc);
+ }
+ return;
+
+ } else if (output == NULL) {
+ return;
+ }
+
+ if (pcmk__str_eq(crm_element_name(output), XML_CIB_TAG_NODE, pcmk__str_casei)) {
+ node_xml = output;
+
+ } else {
+ node_xml = pcmk__xml_first_child(output);
+ }
+
+ for (; node_xml != NULL; node_xml = pcmk__xml_next(node_xml)) {
+ const char *node_uuid = NULL;
+ const char *node_uname = NULL;
+ GHashTableIter iter;
+ crm_node_t *node = NULL;
+ gboolean known = FALSE;
+
+ if (!pcmk__str_eq(crm_element_name(node_xml), XML_CIB_TAG_NODE, pcmk__str_casei)) {
+ continue;
+ }
+
+ node_uuid = crm_element_value(node_xml, XML_ATTR_ID);
+ node_uname = crm_element_value(node_xml, XML_ATTR_UNAME);
+
+ if (node_uuid == NULL || node_uname == NULL) {
+ continue;
+ }
+
+ g_hash_table_iter_init(&iter, crm_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
+ if (node->uuid
+ && pcmk__str_eq(node->uuid, node_uuid, pcmk__str_casei)
+ && node->uname
+ && pcmk__str_eq(node->uname, node_uname, pcmk__str_casei)) {
+
+ known = TRUE;
+ break;
+ }
+ }
+
+ if (known == FALSE) {
+ cib_t *cib_conn = controld_globals.cib_conn;
+ int delete_call_id = 0;
+ xmlNode *node_state_xml = NULL;
+
+ crm_notice("Deleting unknown node %s/%s which has conflicting uname with %s",
+ node_uuid, node_uname, new_node_uuid);
+
+ delete_call_id = cib_conn->cmds->remove(cib_conn, XML_CIB_TAG_NODES,
+ node_xml, cib_scope_local);
+ fsa_register_cib_callback(delete_call_id, strdup(node_uuid),
+ remove_conflicting_node_callback);
+
+ node_state_xml = create_xml_node(NULL, XML_CIB_TAG_STATE);
+ crm_xml_add(node_state_xml, XML_ATTR_ID, node_uuid);
+ crm_xml_add(node_state_xml, XML_ATTR_UNAME, node_uname);
+
+ delete_call_id = cib_conn->cmds->remove(cib_conn,
+ XML_CIB_TAG_STATUS,
+ node_state_xml,
+ cib_scope_local);
+ fsa_register_cib_callback(delete_call_id, strdup(node_uuid),
+ remove_conflicting_node_callback);
+ free_xml(node_state_xml);
+ }
+ }
+}
+
+static void
+node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ fsa_data_t *msg_data = NULL;
+
+ if(call_id < pcmk_ok) {
+ crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id);
+ crm_log_xml_debug(msg, "update:failed");
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+
+ } else if(rc < pcmk_ok) {
+ crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
+ crm_log_xml_debug(msg, "update:failed");
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ }
+}
+
+void
+populate_cib_nodes(enum node_update_flags flags, const char *source)
+{
+ cib_t *cib_conn = controld_globals.cib_conn;
+
+ int call_id = 0;
+ gboolean from_hashtable = TRUE;
+ xmlNode *node_list = create_xml_node(NULL, XML_CIB_TAG_NODES);
+
+#if SUPPORT_COROSYNC
+ if (!pcmk_is_set(flags, node_update_quick) && is_corosync_cluster()) {
+ from_hashtable = pcmk__corosync_add_nodes(node_list);
+ }
+#endif
+
+ if (from_hashtable) {
+ GHashTableIter iter;
+ crm_node_t *node = NULL;
+ GString *xpath = NULL;
+
+ g_hash_table_iter_init(&iter, crm_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
+ xmlNode *new_node = NULL;
+
+ if ((node->uuid != NULL) && (node->uname != NULL)) {
+ crm_trace("Creating node entry for %s/%s", node->uname, node->uuid);
+ if (xpath == NULL) {
+ xpath = g_string_sized_new(512);
+ } else {
+ g_string_truncate(xpath, 0);
+ }
+
+ /* We need both to be valid */
+ new_node = create_xml_node(node_list, XML_CIB_TAG_NODE);
+ crm_xml_add(new_node, XML_ATTR_ID, node->uuid);
+ crm_xml_add(new_node, XML_ATTR_UNAME, node->uname);
+
+ /* Search and remove unknown nodes with the conflicting uname from CIB */
+ pcmk__g_strcat(xpath,
+ "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION
+ "/" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE
+ "[@" XML_ATTR_UNAME "='", node->uname, "']"
+ "[@" XML_ATTR_ID "!='", node->uuid, "']", NULL);
+
+ call_id = cib_conn->cmds->query(cib_conn,
+ (const char *) xpath->str,
+ NULL,
+ cib_scope_local|cib_xpath);
+ fsa_register_cib_callback(call_id, strdup(node->uuid),
+ search_conflicting_node_callback);
+ }
+ }
+
+ if (xpath != NULL) {
+ g_string_free(xpath, TRUE);
+ }
+ }
+
+ crm_trace("Populating <nodes> section from %s", from_hashtable ? "hashtable" : "cluster");
+
+ if ((controld_update_cib(XML_CIB_TAG_NODES, node_list, cib_scope_local,
+ node_list_update_callback) == pcmk_rc_ok)
+ && (crm_peer_cache != NULL) && AM_I_DC) {
+ /*
+ * There is no need to update the local CIB with our values if
+ * we've not seen valid membership data
+ */
+ GHashTableIter iter;
+ crm_node_t *node = NULL;
+
+ free_xml(node_list);
+ node_list = create_xml_node(NULL, XML_CIB_TAG_STATUS);
+
+ g_hash_table_iter_init(&iter, crm_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
+ create_node_state_update(node, flags, node_list, source);
+ }
+
+ if (crm_remote_peer_cache) {
+ g_hash_table_iter_init(&iter, crm_remote_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
+ create_node_state_update(node, flags, node_list, source);
+ }
+ }
+
+ controld_update_cib(XML_CIB_TAG_STATUS, node_list, cib_scope_local,
+ crmd_node_update_complete);
+ }
+ free_xml(node_list);
+}
+
+static void
+cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ fsa_data_t *msg_data = NULL;
+
+ if (rc == pcmk_ok) {
+ crm_trace("Quorum update %d complete", call_id);
+
+ } else {
+ crm_err("Quorum update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
+ crm_log_xml_debug(msg, "failed");
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ }
+}
+
+void
+crm_update_quorum(gboolean quorum, gboolean force_update)
+{
+ bool has_quorum = pcmk_is_set(controld_globals.flags, controld_has_quorum);
+
+ if (quorum) {
+ controld_set_global_flags(controld_ever_had_quorum);
+
+ } else if (pcmk_all_flags_set(controld_globals.flags,
+ controld_ever_had_quorum
+ |controld_no_quorum_suicide)) {
+ pcmk__panic(__func__);
+ }
+
+ if (AM_I_DC
+ && ((has_quorum && !quorum) || (!has_quorum && quorum)
+ || force_update)) {
+ xmlNode *update = NULL;
+
+ update = create_xml_node(NULL, XML_TAG_CIB);
+ crm_xml_add_int(update, XML_ATTR_HAVE_QUORUM, quorum);
+ crm_xml_add(update, XML_ATTR_DC_UUID, controld_globals.our_uuid);
+
+ crm_debug("Updating quorum status to %s", pcmk__btoa(quorum));
+ controld_update_cib(XML_TAG_CIB, update, cib_scope_local,
+ cib_quorum_update_complete);
+ free_xml(update);
+
+ /* Quorum changes usually cause a new transition via other activity:
+ * quorum gained via a node joining will abort via the node join,
+ * and quorum lost via a node leaving will usually abort via resource
+ * activity and/or fencing.
+ *
+ * However, it is possible that nothing else causes a transition (e.g.
+ * someone forces quorum via corosync-cmaptcl, or quorum is lost due to
+ * a node in standby shutting down cleanly), so here ensure a new
+ * transition is triggered.
+ */
+ if (quorum) {
+ /* If quorum was gained, abort after a short delay, in case multiple
+ * nodes are joining around the same time, so the one that brings us
+ * to quorum doesn't cause all the remaining ones to be fenced.
+ */
+ abort_after_delay(INFINITY, pcmk__graph_restart, "Quorum gained",
+ 5000);
+ } else {
+ abort_transition(INFINITY, pcmk__graph_restart, "Quorum lost",
+ NULL);
+ }
+ }
+
+ if (quorum) {
+ controld_set_global_flags(controld_has_quorum);
+ } else {
+ controld_clear_global_flags(controld_has_quorum);
+ }
+}
diff --git a/daemons/controld/controld_membership.h b/daemons/controld/controld_membership.h
new file mode 100644
index 0000000..cfe8cee
--- /dev/null
+++ b/daemons/controld/controld_membership.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2012-2021 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+#ifndef MEMBERSHIP__H
+# define MEMBERSHIP__H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <crm/cluster/internal.h>
+
+void post_cache_update(int instance);
+
+extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
+
+void controld_destroy_failed_sync_table(void);
+void controld_remove_failed_sync_node(const char *node_name);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c
new file mode 100644
index 0000000..54b27ec
--- /dev/null
+++ b/daemons/controld/controld_messages.c
@@ -0,0 +1,1307 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <string.h>
+#include <time.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/cluster/internal.h>
+#include <crm/cib.h>
+#include <crm/common/ipc_internal.h>
+
+#include <pacemaker-controld.h>
+
+extern void crm_shutdown(int nsig);
+
+static enum crmd_fsa_input handle_message(xmlNode *msg,
+ enum crmd_fsa_cause cause);
+static void handle_response(xmlNode *stored_msg);
+static enum crmd_fsa_input handle_request(xmlNode *stored_msg,
+ enum crmd_fsa_cause cause);
+static enum crmd_fsa_input handle_shutdown_request(xmlNode *stored_msg);
+static void send_msg_via_ipc(xmlNode * msg, const char *sys);
+
+/* debug only, can wrap all it likes */
+static int last_data_id = 0;
+
+void
+register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
+ fsa_data_t * cur_data, void *new_data, const char *raised_from)
+{
+ /* save the current actions if any */
+ if (controld_globals.fsa_actions != A_NOTHING) {
+ register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL,
+ I_NULL, cur_data ? cur_data->data : NULL,
+ controld_globals.fsa_actions, TRUE, __func__);
+ }
+
+ /* reset the action list */
+ crm_info("Resetting the current action list");
+ fsa_dump_actions(controld_globals.fsa_actions, "Drop");
+ controld_globals.fsa_actions = A_NOTHING;
+
+ /* register the error */
+ register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from);
+}
+
+void
+register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
+ void *data, uint64_t with_actions,
+ gboolean prepend, const char *raised_from)
+{
+ unsigned old_len = g_list_length(controld_globals.fsa_message_queue);
+ fsa_data_t *fsa_data = NULL;
+
+ if (raised_from == NULL) {
+ raised_from = "<unknown>";
+ }
+
+ if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) {
+ /* no point doing anything */
+ crm_err("Cannot add entry to queue: no input and no action");
+ return;
+ }
+
+ if (input == I_WAIT_FOR_EVENT) {
+ controld_set_global_flags(controld_fsa_is_stalled);
+ crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d",
+ raised_from, fsa_cause2string(cause), data, old_len);
+
+ if (old_len > 0) {
+ fsa_dump_queue(LOG_TRACE);
+ prepend = FALSE;
+ }
+
+ if (data == NULL) {
+ controld_set_fsa_action_flags(with_actions);
+ fsa_dump_actions(with_actions, "Restored");
+ return;
+ }
+
+ /* Store everything in the new event and reset
+ * controld_globals.fsa_actions
+ */
+ with_actions |= controld_globals.fsa_actions;
+ controld_globals.fsa_actions = A_NOTHING;
+ }
+
+ last_data_id++;
+ crm_trace("%s %s FSA input %d (%s) due to %s, %s data",
+ raised_from, (prepend? "prepended" : "appended"), last_data_id,
+ fsa_input2string(input), fsa_cause2string(cause),
+ (data? "with" : "without"));
+
+ fsa_data = calloc(1, sizeof(fsa_data_t));
+ fsa_data->id = last_data_id;
+ fsa_data->fsa_input = input;
+ fsa_data->fsa_cause = cause;
+ fsa_data->origin = raised_from;
+ fsa_data->data = NULL;
+ fsa_data->data_type = fsa_dt_none;
+ fsa_data->actions = with_actions;
+
+ if (with_actions != A_NOTHING) {
+ crm_trace("Adding actions %.16llx to input",
+ (unsigned long long) with_actions);
+ }
+
+ if (data != NULL) {
+ switch (cause) {
+ case C_FSA_INTERNAL:
+ case C_CRMD_STATUS_CALLBACK:
+ case C_IPC_MESSAGE:
+ case C_HA_MESSAGE:
+ CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL,
+ crm_err("Bogus data from %s", raised_from));
+ crm_trace("Copying %s data from %s as cluster message data",
+ fsa_cause2string(cause), raised_from);
+ fsa_data->data = copy_ha_msg_input(data);
+ fsa_data->data_type = fsa_dt_ha_msg;
+ break;
+
+ case C_LRM_OP_CALLBACK:
+ crm_trace("Copying %s data from %s as lrmd_event_data_t",
+ fsa_cause2string(cause), raised_from);
+ fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data);
+ fsa_data->data_type = fsa_dt_lrm;
+ break;
+
+ case C_TIMER_POPPED:
+ case C_SHUTDOWN:
+ case C_UNKNOWN:
+ case C_STARTUP:
+ crm_crit("Copying %s data (from %s) is not yet implemented",
+ fsa_cause2string(cause), raised_from);
+ crmd_exit(CRM_EX_SOFTWARE);
+ break;
+ }
+ }
+
+ /* make sure to free it properly later */
+ if (prepend) {
+ controld_globals.fsa_message_queue
+ = g_list_prepend(controld_globals.fsa_message_queue, fsa_data);
+ } else {
+ controld_globals.fsa_message_queue
+ = g_list_append(controld_globals.fsa_message_queue, fsa_data);
+ }
+
+ crm_trace("FSA message queue length is %d",
+ g_list_length(controld_globals.fsa_message_queue));
+
+ /* fsa_dump_queue(LOG_TRACE); */
+
+ if (old_len == g_list_length(controld_globals.fsa_message_queue)) {
+ crm_err("Couldn't add message to the queue");
+ }
+
+ if (input != I_WAIT_FOR_EVENT) {
+ controld_trigger_fsa();
+ }
+}
+
+void
+fsa_dump_queue(int log_level)
+{
+ int offset = 0;
+
+ for (GList *iter = controld_globals.fsa_message_queue; iter != NULL;
+ iter = iter->next) {
+ fsa_data_t *data = (fsa_data_t *) iter->data;
+
+ do_crm_log_unlikely(log_level,
+ "queue[%d.%d]: input %s raised by %s(%p.%d)\t(cause=%s)",
+ offset++, data->id, fsa_input2string(data->fsa_input),
+ data->origin, data->data, data->data_type,
+ fsa_cause2string(data->fsa_cause));
+ }
+}
+
+ha_msg_input_t *
+copy_ha_msg_input(ha_msg_input_t * orig)
+{
+ ha_msg_input_t *copy = calloc(1, sizeof(ha_msg_input_t));
+
+ CRM_ASSERT(copy != NULL);
+ copy->msg = (orig && orig->msg)? copy_xml(orig->msg) : NULL;
+ copy->xml = get_message_xml(copy->msg, F_CRM_DATA);
+ return copy;
+}
+
+void
+delete_fsa_input(fsa_data_t * fsa_data)
+{
+ lrmd_event_data_t *op = NULL;
+ xmlNode *foo = NULL;
+
+ if (fsa_data == NULL) {
+ return;
+ }
+ crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause));
+
+ if (fsa_data->data != NULL) {
+ switch (fsa_data->data_type) {
+ case fsa_dt_ha_msg:
+ delete_ha_msg_input(fsa_data->data);
+ break;
+
+ case fsa_dt_xml:
+ foo = fsa_data->data;
+ free_xml(foo);
+ break;
+
+ case fsa_dt_lrm:
+ op = (lrmd_event_data_t *) fsa_data->data;
+ lrmd_free_event(op);
+ break;
+
+ case fsa_dt_none:
+ if (fsa_data->data != NULL) {
+ crm_err("Don't know how to free %s data from %s",
+ fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
+ crmd_exit(CRM_EX_SOFTWARE);
+ }
+ break;
+ }
+ crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause));
+ }
+
+ free(fsa_data);
+}
+
+/* returns the next message */
+fsa_data_t *
+get_message(void)
+{
+ fsa_data_t *message
+ = (fsa_data_t *) controld_globals.fsa_message_queue->data;
+
+ controld_globals.fsa_message_queue
+ = g_list_remove(controld_globals.fsa_message_queue, message);
+ crm_trace("Processing input %d", message->id);
+ return message;
+}
+
+void *
+fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller)
+{
+ void *ret_val = NULL;
+
+ if (fsa_data == NULL) {
+ crm_err("%s: No FSA data available", caller);
+
+ } else if (fsa_data->data == NULL) {
+ crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin);
+
+ } else if (fsa_data->data_type != a_type) {
+ crm_crit("%s: Message data was the wrong type! %d vs. requested=%d. Origin: %s",
+ caller, fsa_data->data_type, a_type, fsa_data->origin);
+ CRM_ASSERT(fsa_data->data_type == a_type);
+ } else {
+ ret_val = fsa_data->data;
+ }
+
+ return ret_val;
+}
+
+/* A_MSG_ROUTE */
+void
+do_msg_route(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
+
+ route_message(msg_data->fsa_cause, input->msg);
+}
+
+void
+route_message(enum crmd_fsa_cause cause, xmlNode * input)
+{
+ ha_msg_input_t fsa_input;
+ enum crmd_fsa_input result = I_NULL;
+
+ fsa_input.msg = input;
+ CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return);
+
+ /* try passing the buck first */
+ if (relay_message(input, cause == C_IPC_MESSAGE)) {
+ return;
+ }
+
+ /* handle locally */
+ result = handle_message(input, cause);
+
+ /* done or process later? */
+ switch (result) {
+ case I_NULL:
+ case I_CIB_OP:
+ case I_ROUTER:
+ case I_NODE_JOIN:
+ case I_JOIN_REQUEST:
+ case I_JOIN_RESULT:
+ break;
+ default:
+ /* Defering local processing of message */
+ register_fsa_input_later(cause, result, &fsa_input);
+ return;
+ }
+
+ if (result != I_NULL) {
+ /* add to the front of the queue */
+ register_fsa_input(cause, result, &fsa_input);
+ }
+}
+
+gboolean
+relay_message(xmlNode * msg, gboolean originated_locally)
+{
+ int dest = 1;
+ bool is_for_dc = false;
+ bool is_for_dcib = false;
+ bool is_for_te = false;
+ bool is_for_crm = false;
+ bool is_for_cib = false;
+ bool is_local = false;
+ const char *host_to = crm_element_value(msg, F_CRM_HOST_TO);
+ const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
+ const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
+ const char *type = crm_element_value(msg, F_TYPE);
+ const char *task = crm_element_value(msg, F_CRM_TASK);
+ const char *ref = crm_element_value(msg, XML_ATTR_REFERENCE);
+
+ if (ref == NULL) {
+ ref = "without reference ID";
+ }
+
+ if (msg == NULL) {
+ crm_warn("Cannot route empty message");
+ return TRUE;
+
+ } else if (pcmk__str_eq(task, CRM_OP_HELLO, pcmk__str_casei)) {
+ crm_trace("No routing needed for hello message %s", ref);
+ return TRUE;
+
+ } else if (!pcmk__str_eq(type, T_CRM, pcmk__str_casei)) {
+ crm_warn("Received invalid message %s: type '%s' not '" T_CRM "'",
+ ref, pcmk__s(type, ""));
+ crm_log_xml_warn(msg, "[bad message type]");
+ return TRUE;
+
+ } else if (sys_to == NULL) {
+ crm_warn("Received invalid message %s: no subsystem", ref);
+ crm_log_xml_warn(msg, "[no subsystem]");
+ return TRUE;
+ }
+
+ is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0);
+ is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0);
+ is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0);
+ is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0);
+ is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0);
+
+ is_local = false;
+ if (pcmk__str_empty(host_to)) {
+ if (is_for_dc || is_for_te) {
+ is_local = false;
+
+ } else if (is_for_crm) {
+ if (pcmk__strcase_any_of(task, CRM_OP_NODE_INFO,
+ PCMK__CONTROLD_CMD_NODES, NULL)) {
+ /* Node info requests do not specify a host, which is normally
+ * treated as "all hosts", because the whole point is that the
+ * client may not know the local node name. Always handle these
+ * requests locally.
+ */
+ is_local = true;
+ } else {
+ is_local = !originated_locally;
+ }
+
+ } else {
+ is_local = true;
+ }
+
+ } else if (pcmk__str_eq(controld_globals.our_nodename, host_to,
+ pcmk__str_casei)) {
+ is_local = true;
+ } else if (is_for_crm && pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) {
+ xmlNode *msg_data = get_message_xml(msg, F_CRM_DATA);
+ const char *mode = crm_element_value(msg_data, PCMK__XA_MODE);
+
+ if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_casei)) {
+ // Local delete of an offline node's resource history
+ is_local = true;
+ }
+ }
+
+ if (is_for_dc || is_for_dcib || is_for_te) {
+ if (AM_I_DC && is_for_te) {
+ crm_trace("Route message %s locally as transition request", ref);
+ send_msg_via_ipc(msg, sys_to);
+
+ } else if (AM_I_DC) {
+ crm_trace("Route message %s locally as DC request", ref);
+ return FALSE; // More to be done by caller
+
+ } else if (originated_locally && !pcmk__strcase_any_of(sys_from, CRM_SYSTEM_PENGINE,
+ CRM_SYSTEM_TENGINE, NULL)) {
+
+ if (is_corosync_cluster()) {
+ dest = text2msg_type(sys_to);
+ }
+ crm_trace("Relay message %s to DC", ref);
+ send_cluster_message(host_to ? crm_get_peer(0, host_to) : NULL, dest, msg, TRUE);
+
+ } else {
+ /* Neither the TE nor the scheduler should be sending messages
+ * to DCs on other nodes. By definition, if we are no longer the DC,
+ * then the scheduler's or TE's data should be discarded.
+ */
+ crm_trace("Discard message %s because we are not DC", ref);
+ }
+
+ } else if (is_local && (is_for_crm || is_for_cib)) {
+ crm_trace("Route message %s locally as controller request", ref);
+ return FALSE; // More to be done by caller
+
+ } else if (is_local) {
+ crm_trace("Relay message %s locally to %s",
+ ref, (sys_to? sys_to : "unknown client"));
+ crm_log_xml_trace(msg, "[IPC relay]");
+ send_msg_via_ipc(msg, sys_to);
+
+ } else {
+ crm_node_t *node_to = NULL;
+
+ if (is_corosync_cluster()) {
+ dest = text2msg_type(sys_to);
+
+ if (dest == crm_msg_none || dest > crm_msg_stonith_ng) {
+ dest = crm_msg_crmd;
+ }
+ }
+
+ if (host_to) {
+ node_to = pcmk__search_cluster_node_cache(0, host_to);
+ if (node_to == NULL) {
+ crm_warn("Cannot route message %s: Unknown node %s",
+ ref, host_to);
+ return TRUE;
+ }
+ crm_trace("Relay message %s to %s",
+ ref, (node_to->uname? node_to->uname : "peer"));
+ } else {
+ crm_trace("Broadcast message %s to all peers", ref);
+ }
+ send_cluster_message(host_to ? node_to : NULL, dest, msg, TRUE);
+ }
+
+ return TRUE; // No further processing of message is needed
+}
+
+// Return true if field contains a positive integer
+static bool
+authorize_version(xmlNode *message_data, const char *field,
+ const char *client_name, const char *ref, const char *uuid)
+{
+ const char *version = crm_element_value(message_data, field);
+ long long version_num;
+
+ if ((pcmk__scan_ll(version, &version_num, -1LL) != pcmk_rc_ok)
+ || (version_num < 0LL)) {
+
+ crm_warn("Rejected IPC hello from %s: '%s' is not a valid protocol %s "
+ CRM_XS " ref=%s uuid=%s",
+ client_name, ((version == NULL)? "" : version),
+ field, (ref? ref : "none"), uuid);
+ return false;
+ }
+ return true;
+}
+
+/*!
+ * \internal
+ * \brief Check whether a client IPC message is acceptable
+ *
+ * If a given client IPC message is a hello, "authorize" it by ensuring it has
+ * valid information such as a protocol version, and return false indicating
+ * that nothing further needs to be done with the message. If the message is not
+ * a hello, just return true to indicate it needs further processing.
+ *
+ * \param[in] client_msg XML of IPC message
+ * \param[in,out] curr_client If IPC is not proxied, client that sent message
+ * \param[in] proxy_session If IPC is proxied, the session ID
+ *
+ * \return true if message needs further processing, false if it doesn't
+ */
+bool
+controld_authorize_ipc_message(const xmlNode *client_msg, pcmk__client_t *curr_client,
+ const char *proxy_session)
+{
+ xmlNode *message_data = NULL;
+ const char *client_name = NULL;
+ const char *op = crm_element_value(client_msg, F_CRM_TASK);
+ const char *ref = crm_element_value(client_msg, XML_ATTR_REFERENCE);
+ const char *uuid = (curr_client? curr_client->id : proxy_session);
+
+ if (uuid == NULL) {
+ crm_warn("IPC message from client rejected: No client identifier "
+ CRM_XS " ref=%s", (ref? ref : "none"));
+ goto rejected;
+ }
+
+ if (!pcmk__str_eq(CRM_OP_HELLO, op, pcmk__str_casei)) {
+ // Only hello messages need to be authorized
+ return true;
+ }
+
+ message_data = get_message_xml(client_msg, F_CRM_DATA);
+
+ client_name = crm_element_value(message_data, "client_name");
+ if (pcmk__str_empty(client_name)) {
+ crm_warn("IPC hello from client rejected: No client name",
+ CRM_XS " ref=%s uuid=%s", (ref? ref : "none"), uuid);
+ goto rejected;
+ }
+ if (!authorize_version(message_data, "major_version", client_name, ref,
+ uuid)) {
+ goto rejected;
+ }
+ if (!authorize_version(message_data, "minor_version", client_name, ref,
+ uuid)) {
+ goto rejected;
+ }
+
+ crm_trace("Validated IPC hello from client %s", client_name);
+ if (curr_client) {
+ curr_client->userdata = strdup(client_name);
+ }
+ controld_trigger_fsa();
+ return false;
+
+rejected:
+ if (curr_client) {
+ qb_ipcs_disconnect(curr_client->ipcs);
+ }
+ return false;
+}
+
+static enum crmd_fsa_input
+handle_message(xmlNode *msg, enum crmd_fsa_cause cause)
+{
+ const char *type = NULL;
+
+ CRM_CHECK(msg != NULL, return I_NULL);
+
+ type = crm_element_value(msg, F_CRM_MSG_TYPE);
+ if (pcmk__str_eq(type, XML_ATTR_REQUEST, pcmk__str_none)) {
+ return handle_request(msg, cause);
+
+ } else if (pcmk__str_eq(type, XML_ATTR_RESPONSE, pcmk__str_none)) {
+ handle_response(msg);
+ return I_NULL;
+ }
+
+ crm_err("Unknown message type: %s", type);
+ return I_NULL;
+}
+
+static enum crmd_fsa_input
+handle_failcount_op(xmlNode * stored_msg)
+{
+ const char *rsc = NULL;
+ const char *uname = NULL;
+ const char *op = NULL;
+ char *interval_spec = NULL;
+ guint interval_ms = 0;
+ gboolean is_remote_node = FALSE;
+ xmlNode *xml_op = get_message_xml(stored_msg, F_CRM_DATA);
+
+ if (xml_op) {
+ xmlNode *xml_rsc = first_named_child(xml_op, XML_CIB_TAG_RESOURCE);
+ xmlNode *xml_attrs = first_named_child(xml_op, XML_TAG_ATTRS);
+
+ if (xml_rsc) {
+ rsc = ID(xml_rsc);
+ }
+ if (xml_attrs) {
+ op = crm_element_value(xml_attrs,
+ CRM_META "_" XML_RSC_ATTR_CLEAR_OP);
+ crm_element_value_ms(xml_attrs,
+ CRM_META "_" XML_RSC_ATTR_CLEAR_INTERVAL,
+ &interval_ms);
+ }
+ }
+ uname = crm_element_value(xml_op, XML_LRM_ATTR_TARGET);
+
+ if ((rsc == NULL) || (uname == NULL)) {
+ crm_log_xml_warn(stored_msg, "invalid failcount op");
+ return I_NULL;
+ }
+
+ if (crm_element_value(xml_op, XML_LRM_ATTR_ROUTER_NODE)) {
+ is_remote_node = TRUE;
+ }
+
+ crm_debug("Clearing failures for %s-interval %s on %s "
+ "from attribute manager, CIB, and executor state",
+ pcmk__readable_interval(interval_ms), rsc, uname);
+
+ if (interval_ms) {
+ interval_spec = crm_strdup_printf("%ums", interval_ms);
+ }
+ update_attrd_clear_failures(uname, rsc, op, interval_spec, is_remote_node);
+ free(interval_spec);
+
+ controld_cib_delete_last_failure(rsc, uname, op, interval_ms);
+
+ lrm_clear_last_failure(rsc, uname, op, interval_ms);
+
+ return I_NULL;
+}
+
+static enum crmd_fsa_input
+handle_lrm_delete(xmlNode *stored_msg)
+{
+ const char *mode = NULL;
+ xmlNode *msg_data = get_message_xml(stored_msg, F_CRM_DATA);
+
+ CRM_CHECK(msg_data != NULL, return I_NULL);
+
+ /* CRM_OP_LRM_DELETE has two distinct modes. The default behavior is to
+ * relay the operation to the affected node, which will unregister the
+ * resource from the local executor, clear the resource's history from the
+ * CIB, and do some bookkeeping in the controller.
+ *
+ * However, if the affected node is offline, the client will specify
+ * mode="cib" which means the controller receiving the operation should
+ * clear the resource's history from the CIB and nothing else. This is used
+ * to clear shutdown locks.
+ */
+ mode = crm_element_value(msg_data, PCMK__XA_MODE);
+ if ((mode == NULL) || strcmp(mode, XML_TAG_CIB)) {
+ // Relay to affected node
+ crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD);
+ return I_ROUTER;
+
+ } else {
+ // Delete CIB history locally (compare with do_lrm_delete())
+ const char *from_sys = NULL;
+ const char *user_name = NULL;
+ const char *rsc_id = NULL;
+ const char *node = NULL;
+ xmlNode *rsc_xml = NULL;
+ int rc = pcmk_rc_ok;
+
+ rsc_xml = first_named_child(msg_data, XML_CIB_TAG_RESOURCE);
+ CRM_CHECK(rsc_xml != NULL, return I_NULL);
+
+ rsc_id = ID(rsc_xml);
+ from_sys = crm_element_value(stored_msg, F_CRM_SYS_FROM);
+ node = crm_element_value(msg_data, XML_LRM_ATTR_TARGET);
+ user_name = pcmk__update_acl_user(stored_msg, F_CRM_USER, NULL);
+ crm_debug("Handling " CRM_OP_LRM_DELETE " for %s on %s locally%s%s "
+ "(clearing CIB resource history only)", rsc_id, node,
+ (user_name? " for user " : ""), (user_name? user_name : ""));
+ rc = controld_delete_resource_history(rsc_id, node, user_name,
+ cib_dryrun|cib_sync_call);
+ if (rc == pcmk_rc_ok) {
+ rc = controld_delete_resource_history(rsc_id, node, user_name,
+ crmd_cib_smart_opt());
+ }
+
+ //Notify client and tengine.(Only notify tengine if mode = "cib" and CRM_OP_LRM_DELETE.)
+ if (from_sys) {
+ lrmd_event_data_t *op = NULL;
+ const char *from_host = crm_element_value(stored_msg,
+ F_CRM_HOST_FROM);
+ const char *transition;
+
+ if (strcmp(from_sys, CRM_SYSTEM_TENGINE)) {
+ transition = crm_element_value(msg_data,
+ XML_ATTR_TRANSITION_KEY);
+ } else {
+ transition = crm_element_value(stored_msg,
+ XML_ATTR_TRANSITION_KEY);
+ }
+
+ crm_info("Notifying %s on %s that %s was%s deleted",
+ from_sys, (from_host? from_host : "local node"), rsc_id,
+ ((rc == pcmk_rc_ok)? "" : " not"));
+ op = lrmd_new_event(rsc_id, CRMD_ACTION_DELETE, 0);
+ op->type = lrmd_event_exec_complete;
+ op->user_data = strdup(transition? transition : FAKE_TE_ID);
+ op->params = pcmk__strkey_table(free, free);
+ g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION),
+ strdup(CRM_FEATURE_SET));
+ controld_rc2event(op, rc);
+ controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id);
+ lrmd_free_event(op);
+ controld_trigger_delete_refresh(from_sys, rsc_id);
+ }
+ return I_NULL;
+ }
+}
+
+/*!
+ * \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache
+ *
+ * \param[in] msg Message XML
+ *
+ * \return Next FSA input
+ */
+static enum crmd_fsa_input
+handle_remote_state(const xmlNode *msg)
+{
+ const char *conn_host = NULL;
+ const char *remote_uname = ID(msg);
+ crm_node_t *remote_peer;
+ bool remote_is_up = false;
+ int rc = pcmk_rc_ok;
+
+ rc = pcmk__xe_get_bool_attr(msg, XML_NODE_IN_CLUSTER, &remote_is_up);
+
+ CRM_CHECK(remote_uname && rc == pcmk_rc_ok, return I_NULL);
+
+ remote_peer = crm_remote_peer_get(remote_uname);
+ CRM_CHECK(remote_peer, return I_NULL);
+
+ pcmk__update_peer_state(__func__, remote_peer,
+ remote_is_up ? CRM_NODE_MEMBER : CRM_NODE_LOST,
+ 0);
+
+ conn_host = crm_element_value(msg, PCMK__XA_CONN_HOST);
+ if (conn_host) {
+ pcmk__str_update(&remote_peer->conn_host, conn_host);
+ } else if (remote_peer->conn_host) {
+ free(remote_peer->conn_host);
+ remote_peer->conn_host = NULL;
+ }
+
+ return I_NULL;
+}
+
+/*!
+ * \brief Handle a CRM_OP_PING message
+ *
+ * \param[in] msg Message XML
+ *
+ * \return Next FSA input
+ */
+static enum crmd_fsa_input
+handle_ping(const xmlNode *msg)
+{
+ const char *value = NULL;
+ xmlNode *ping = NULL;
+ xmlNode *reply = NULL;
+
+ // Build reply
+
+ ping = create_xml_node(NULL, XML_CRM_TAG_PING);
+ value = crm_element_value(msg, F_CRM_SYS_TO);
+ crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value);
+
+ // Add controller state
+ value = fsa_state2string(controld_globals.fsa_state);
+ crm_xml_add(ping, XML_PING_ATTR_CRMDSTATE, value);
+ crm_notice("Current ping state: %s", value); // CTS needs this
+
+ // Add controller health
+ // @TODO maybe do some checks to determine meaningful status
+ crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
+
+ // Send reply
+ reply = create_reply(msg, ping);
+ free_xml(ping);
+ if (reply != NULL) {
+ (void) relay_message(reply, TRUE);
+ free_xml(reply);
+ }
+
+ // Nothing further to do
+ return I_NULL;
+}
+
+/*!
+ * \brief Handle a PCMK__CONTROLD_CMD_NODES message
+ *
+ * \param[in] request Message XML
+ *
+ * \return Next FSA input
+ */
+static enum crmd_fsa_input
+handle_node_list(const xmlNode *request)
+{
+ GHashTableIter iter;
+ crm_node_t *node = NULL;
+ xmlNode *reply = NULL;
+ xmlNode *reply_data = NULL;
+
+ // Create message data for reply
+ reply_data = create_xml_node(NULL, XML_CIB_TAG_NODES);
+ g_hash_table_iter_init(&iter, crm_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
+ xmlNode *xml = create_xml_node(reply_data, XML_CIB_TAG_NODE);
+
+ crm_xml_add_ll(xml, XML_ATTR_ID, (long long) node->id); // uint32_t
+ crm_xml_add(xml, XML_ATTR_UNAME, node->uname);
+ crm_xml_add(xml, XML_NODE_IN_CLUSTER, node->state);
+ }
+
+ // Create and send reply
+ reply = create_reply(request, reply_data);
+ free_xml(reply_data);
+ if (reply) {
+ (void) relay_message(reply, TRUE);
+ free_xml(reply);
+ }
+
+ // Nothing further to do
+ return I_NULL;
+}
+
+/*!
+ * \brief Handle a CRM_OP_NODE_INFO request
+ *
+ * \param[in] msg Message XML
+ *
+ * \return Next FSA input
+ */
+static enum crmd_fsa_input
+handle_node_info_request(const xmlNode *msg)
+{
+ const char *value = NULL;
+ crm_node_t *node = NULL;
+ int node_id = 0;
+ xmlNode *reply = NULL;
+ xmlNode *reply_data = NULL;
+
+ // Build reply
+
+ reply_data = create_xml_node(NULL, XML_CIB_TAG_NODE);
+ crm_xml_add(reply_data, XML_PING_ATTR_SYSFROM, CRM_SYSTEM_CRMD);
+
+ // Add whether current partition has quorum
+ pcmk__xe_set_bool_attr(reply_data, XML_ATTR_HAVE_QUORUM,
+ pcmk_is_set(controld_globals.flags,
+ controld_has_quorum));
+
+ // Check whether client requested node info by ID and/or name
+ crm_element_value_int(msg, XML_ATTR_ID, &node_id);
+ if (node_id < 0) {
+ node_id = 0;
+ }
+ value = crm_element_value(msg, XML_ATTR_UNAME);
+
+ // Default to local node if none given
+ if ((node_id == 0) && (value == NULL)) {
+ value = controld_globals.our_nodename;
+ }
+
+ node = pcmk__search_node_caches(node_id, value, CRM_GET_PEER_ANY);
+ if (node) {
+ crm_xml_add(reply_data, XML_ATTR_ID, node->uuid);
+ crm_xml_add(reply_data, XML_ATTR_UNAME, node->uname);
+ crm_xml_add(reply_data, XML_NODE_IS_PEER, node->state);
+ pcmk__xe_set_bool_attr(reply_data, XML_NODE_IS_REMOTE,
+ pcmk_is_set(node->flags, crm_remote_node));
+ }
+
+ // Send reply
+ reply = create_reply(msg, reply_data);
+ free_xml(reply_data);
+ if (reply != NULL) {
+ (void) relay_message(reply, TRUE);
+ free_xml(reply);
+ }
+
+ // Nothing further to do
+ return I_NULL;
+}
+
+static void
+verify_feature_set(xmlNode *msg)
+{
+ const char *dc_version = crm_element_value(msg, XML_ATTR_CRM_VERSION);
+
+ if (dc_version == NULL) {
+ /* All we really know is that the DC feature set is older than 3.1.0,
+ * but that's also all that really matters.
+ */
+ dc_version = "3.0.14";
+ }
+
+ if (feature_set_compatible(dc_version, CRM_FEATURE_SET)) {
+ crm_trace("Local feature set (%s) is compatible with DC's (%s)",
+ CRM_FEATURE_SET, dc_version);
+ } else {
+ crm_err("Local feature set (%s) is incompatible with DC's (%s)",
+ CRM_FEATURE_SET, dc_version);
+
+ // Nothing is likely to improve without administrator involvement
+ controld_set_fsa_input_flags(R_STAYDOWN);
+ crmd_exit(CRM_EX_FATAL);
+ }
+}
+
+// DC gets own shutdown all-clear
+static enum crmd_fsa_input
+handle_shutdown_self_ack(xmlNode *stored_msg)
+{
+ const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ // The expected case -- we initiated own shutdown sequence
+ crm_info("Shutting down controller");
+ return I_STOP;
+ }
+
+ if (pcmk__str_eq(host_from, controld_globals.dc_name, pcmk__str_casei)) {
+ // Must be logic error -- DC confirming its own unrequested shutdown
+ crm_err("Shutting down controller immediately due to "
+ "unexpected shutdown confirmation");
+ return I_TERMINATE;
+ }
+
+ if (controld_globals.fsa_state != S_STOPPING) {
+ // Shouldn't happen -- non-DC confirming unrequested shutdown
+ crm_err("Starting new DC election because %s is "
+ "confirming shutdown we did not request",
+ (host_from? host_from : "another node"));
+ return I_ELECTION;
+ }
+
+ // Shouldn't happen, but we are already stopping anyway
+ crm_debug("Ignoring unexpected shutdown confirmation from %s",
+ (host_from? host_from : "another node"));
+ return I_NULL;
+}
+
+// Non-DC gets shutdown all-clear from DC
+static enum crmd_fsa_input
+handle_shutdown_ack(xmlNode *stored_msg)
+{
+ const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
+
+ if (host_from == NULL) {
+ crm_warn("Ignoring shutdown request without origin specified");
+ return I_NULL;
+ }
+
+ if (pcmk__str_eq(host_from, controld_globals.dc_name,
+ pcmk__str_null_matches|pcmk__str_casei)) {
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ crm_info("Shutting down controller after confirmation from %s",
+ host_from);
+ } else {
+ crm_err("Shutting down controller after unexpected "
+ "shutdown request from %s", host_from);
+ controld_set_fsa_input_flags(R_STAYDOWN);
+ }
+ return I_STOP;
+ }
+
+ crm_warn("Ignoring shutdown request from %s because DC is %s",
+ host_from, controld_globals.dc_name);
+ return I_NULL;
+}
+
+static enum crmd_fsa_input
+handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause)
+{
+ xmlNode *msg = NULL;
+ const char *op = crm_element_value(stored_msg, F_CRM_TASK);
+
+ /* Optimize this for the DC - it has the most to do */
+
+ if (op == NULL) {
+ crm_log_xml_warn(stored_msg, "[request without " F_CRM_TASK "]");
+ return I_NULL;
+ }
+
+ if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
+ const char *from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
+ crm_node_t *node = pcmk__search_cluster_node_cache(0, from);
+
+ pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN);
+ if(AM_I_DC == FALSE) {
+ return I_NULL; /* Done */
+ }
+ }
+
+ /*========== DC-Only Actions ==========*/
+ if (AM_I_DC) {
+ if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) {
+ return I_NODE_JOIN;
+
+ } else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) {
+ return I_JOIN_REQUEST;
+
+ } else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) {
+ return I_JOIN_RESULT;
+
+ } else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
+ return handle_shutdown_self_ack(stored_msg);
+
+ } else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
+ // Another controller wants to shut down its node
+ return handle_shutdown_request(stored_msg);
+ }
+ }
+
+ /*========== common actions ==========*/
+ if (strcmp(op, CRM_OP_NOVOTE) == 0) {
+ ha_msg_input_t fsa_input;
+
+ fsa_input.msg = stored_msg;
+ register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
+ A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE,
+ __func__);
+
+ } else if (strcmp(op, CRM_OP_REMOTE_STATE) == 0) {
+ /* a remote connection host is letting us know the node state */
+ return handle_remote_state(stored_msg);
+
+ } else if (strcmp(op, CRM_OP_THROTTLE) == 0) {
+ throttle_update(stored_msg);
+ if (AM_I_DC && (controld_globals.transition_graph != NULL)
+ && !controld_globals.transition_graph->complete) {
+
+ crm_debug("The throttle changed. Trigger a graph.");
+ trigger_graph();
+ }
+ return I_NULL;
+
+ } else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) {
+ return handle_failcount_op(stored_msg);
+
+ } else if (strcmp(op, CRM_OP_VOTE) == 0) {
+ /* count the vote and decide what to do after that */
+ ha_msg_input_t fsa_input;
+
+ fsa_input.msg = stored_msg;
+ register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
+ A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE,
+ __func__);
+
+ /* Sometimes we _must_ go into S_ELECTION */
+ if (controld_globals.fsa_state == S_HALT) {
+ crm_debug("Forcing an election from S_HALT");
+ return I_ELECTION;
+#if 0
+ } else if (AM_I_DC) {
+ /* This is the old way of doing things but what is gained? */
+ return I_ELECTION;
+#endif
+ }
+
+ } else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) {
+ verify_feature_set(stored_msg);
+ crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
+ return I_JOIN_OFFER;
+
+ } else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) {
+ crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
+ return I_JOIN_RESULT;
+
+ } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0) {
+ return handle_lrm_delete(stored_msg);
+
+ } else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0)
+ || (strcmp(op, CRM_OP_LRM_REFRESH) == 0) // @COMPAT
+ || (strcmp(op, CRM_OP_REPROBE) == 0)) {
+
+ crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD);
+ return I_ROUTER;
+
+ } else if (strcmp(op, CRM_OP_NOOP) == 0) {
+ return I_NULL;
+
+ } else if (strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) {
+
+ crm_shutdown(SIGTERM);
+ /*return I_SHUTDOWN; */
+ return I_NULL;
+
+ } else if (strcmp(op, CRM_OP_PING) == 0) {
+ return handle_ping(stored_msg);
+
+ } else if (strcmp(op, CRM_OP_NODE_INFO) == 0) {
+ return handle_node_info_request(stored_msg);
+
+ } else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) {
+ int id = 0;
+ const char *name = NULL;
+
+ crm_element_value_int(stored_msg, XML_ATTR_ID, &id);
+ name = crm_element_value(stored_msg, XML_ATTR_UNAME);
+
+ if(cause == C_IPC_MESSAGE) {
+ msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
+ if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
+ crm_err("Could not instruct peers to remove references to node %s/%u", name, id);
+ } else {
+ crm_notice("Instructing peers to remove references to node %s/%u", name, id);
+ }
+ free_xml(msg);
+
+ } else {
+ reap_crm_member(id, name);
+
+ /* If we're forgetting this node, also forget any failures to fence
+ * it, so we don't carry that over to any node added later with the
+ * same name.
+ */
+ st_fail_count_reset(name);
+ }
+
+ } else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) {
+ xmlNode *xml = get_message_xml(stored_msg, F_CRM_DATA);
+
+ remote_ra_process_maintenance_nodes(xml);
+
+ } else if (strcmp(op, PCMK__CONTROLD_CMD_NODES) == 0) {
+ return handle_node_list(stored_msg);
+
+ /*========== (NOT_DC)-Only Actions ==========*/
+ } else if (!AM_I_DC) {
+
+ if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
+ return handle_shutdown_ack(stored_msg);
+ }
+
+ } else {
+ crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node");
+ crm_log_xml_err(stored_msg, "Unexpected");
+ }
+
+ return I_NULL;
+}
+
+static void
+handle_response(xmlNode *stored_msg)
+{
+ const char *op = crm_element_value(stored_msg, F_CRM_TASK);
+
+ if (op == NULL) {
+ crm_log_xml_err(stored_msg, "Bad message");
+
+ } else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) {
+ // Check whether scheduler answer been superseded by subsequent request
+ const char *msg_ref = crm_element_value(stored_msg, XML_ATTR_REFERENCE);
+
+ if (msg_ref == NULL) {
+ crm_err("%s - Ignoring calculation with no reference", op);
+
+ } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref,
+ pcmk__str_none)) {
+ ha_msg_input_t fsa_input;
+
+ controld_stop_sched_timer();
+ fsa_input.msg = stored_msg;
+ register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
+
+ } else {
+ crm_info("%s calculation %s is obsolete", op, msg_ref);
+ }
+
+ } else if (strcmp(op, CRM_OP_VOTE) == 0
+ || strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) {
+
+ } else {
+ const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
+
+ crm_err("Unexpected response (op=%s, src=%s) sent to the %s",
+ op, host_from, AM_I_DC ? "DC" : "controller");
+ }
+}
+
+static enum crmd_fsa_input
+handle_shutdown_request(xmlNode * stored_msg)
+{
+ /* handle here to avoid potential version issues
+ * where the shutdown message/procedure may have
+ * been changed in later versions.
+ *
+ * This way the DC is always in control of the shutdown
+ */
+
+ char *now_s = NULL;
+ const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
+
+ if (host_from == NULL) {
+ /* we're shutting down and the DC */
+ host_from = controld_globals.our_nodename;
+ }
+
+ crm_info("Creating shutdown request for %s (state=%s)", host_from,
+ fsa_state2string(controld_globals.fsa_state));
+ crm_log_xml_trace(stored_msg, "message");
+
+ now_s = pcmk__ttoa(time(NULL));
+ update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, FALSE);
+ free(now_s);
+
+ /* will be picked up by the TE as long as its running */
+ return I_NULL;
+}
+
+static void
+send_msg_via_ipc(xmlNode * msg, const char *sys)
+{
+ pcmk__client_t *client_channel = NULL;
+
+ CRM_CHECK(sys != NULL, return);
+
+ client_channel = pcmk__find_client_by_id(sys);
+
+ if (crm_element_value(msg, F_CRM_HOST_FROM) == NULL) {
+ crm_xml_add(msg, F_CRM_HOST_FROM, controld_globals.our_nodename);
+ }
+
+ if (client_channel != NULL) {
+ /* Transient clients such as crmadmin */
+ pcmk__ipc_send_xml(client_channel, 0, msg, crm_ipc_server_event);
+
+ } else if (pcmk__str_eq(sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
+ xmlNode *data = get_message_xml(msg, F_CRM_DATA);
+
+ process_te_message(msg, data);
+
+ } else if (pcmk__str_eq(sys, CRM_SYSTEM_LRMD, pcmk__str_none)) {
+ fsa_data_t fsa_data;
+ ha_msg_input_t fsa_input;
+
+ fsa_input.msg = msg;
+ fsa_input.xml = get_message_xml(msg, F_CRM_DATA);
+
+ fsa_data.id = 0;
+ fsa_data.actions = 0;
+ fsa_data.data = &fsa_input;
+ fsa_data.fsa_input = I_MESSAGE;
+ fsa_data.fsa_cause = C_IPC_MESSAGE;
+ fsa_data.origin = __func__;
+ fsa_data.data_type = fsa_dt_ha_msg;
+
+ do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, controld_globals.fsa_state,
+ I_MESSAGE, &fsa_data);
+
+ } else if (crmd_is_proxy_session(sys)) {
+ crmd_proxy_send(sys, msg);
+
+ } else {
+ crm_info("Received invalid request: unknown subsystem '%s'", sys);
+ }
+}
+
+void
+delete_ha_msg_input(ha_msg_input_t * orig)
+{
+ if (orig == NULL) {
+ return;
+ }
+ free_xml(orig->msg);
+ free(orig);
+}
+
+/*!
+ * \internal
+ * \brief Notify the cluster of a remote node state change
+ *
+ * \param[in] node_name Node's name
+ * \param[in] node_up true if node is up, false if down
+ */
+void
+broadcast_remote_state_message(const char *node_name, bool node_up)
+{
+ xmlNode *msg = create_request(CRM_OP_REMOTE_STATE, NULL, NULL,
+ CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
+
+ crm_info("Notifying cluster of Pacemaker Remote node %s %s",
+ node_name, node_up? "coming up" : "going down");
+
+ crm_xml_add(msg, XML_ATTR_ID, node_name);
+ pcmk__xe_set_bool_attr(msg, XML_NODE_IN_CLUSTER, node_up);
+
+ if (node_up) {
+ crm_xml_add(msg, PCMK__XA_CONN_HOST, controld_globals.our_nodename);
+ }
+
+ send_cluster_message(NULL, crm_msg_crmd, msg, TRUE);
+ free_xml(msg);
+}
+
diff --git a/daemons/controld/controld_messages.h b/daemons/controld/controld_messages.h
new file mode 100644
index 0000000..4108961
--- /dev/null
+++ b/daemons/controld/controld_messages.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef XML_CRM_MESSAGES__H
+# define XML_CRM_MESSAGES__H
+
+# include <crm/crm.h>
+# include <crm/common/ipc_internal.h>
+# include <crm/common/xml.h>
+# include <crm/cluster/internal.h>
+# include <controld_fsa.h>
+
+typedef struct ha_msg_input_s {
+ xmlNode *msg;
+ xmlNode *xml;
+
+} ha_msg_input_t;
+
+extern void delete_ha_msg_input(ha_msg_input_t * orig);
+
+extern void *fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type,
+ const char *caller);
+
+# define fsa_typed_data(x) fsa_typed_data_adv(msg_data, x, __func__)
+
+extern void register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
+ fsa_data_t * cur_data, void *new_data, const char *raised_from);
+
+#define register_fsa_error(cause, input, new_data) \
+ register_fsa_error_adv(cause, input, msg_data, new_data, __func__)
+
+void register_fsa_input_adv(enum crmd_fsa_cause cause,
+ enum crmd_fsa_input input, void *data,
+ uint64_t with_actions, gboolean prepend,
+ const char *raised_from);
+
+extern void fsa_dump_queue(int log_level);
+extern void route_message(enum crmd_fsa_cause cause, xmlNode * input);
+
+# define crmd_fsa_stall(suppress) do { \
+ if(suppress == FALSE && msg_data != NULL) { \
+ register_fsa_input_adv( \
+ ((fsa_data_t*)msg_data)->fsa_cause, I_WAIT_FOR_EVENT, \
+ ((fsa_data_t*)msg_data)->data, action, TRUE, __func__); \
+ } else { \
+ register_fsa_input_adv( \
+ C_FSA_INTERNAL, I_WAIT_FOR_EVENT, \
+ NULL, action, TRUE, __func__); \
+ } \
+ } while(0)
+
+#define register_fsa_input(cause, input, data) \
+ register_fsa_input_adv(cause, input, data, A_NOTHING, FALSE, __func__)
+
+#define register_fsa_input_before(cause, input, data) \
+ register_fsa_input_adv(cause, input, data, A_NOTHING, TRUE, __func__)
+
+#define register_fsa_input_later(cause, input, data) \
+ register_fsa_input_adv(cause, input, data, A_NOTHING, FALSE, __func__)
+
+void delete_fsa_input(fsa_data_t * fsa_data);
+
+fsa_data_t *get_message(void);
+
+extern gboolean relay_message(xmlNode * relay_message, gboolean originated_locally);
+
+gboolean crmd_is_proxy_session(const char *session);
+void crmd_proxy_send(const char *session, xmlNode *msg);
+
+bool controld_authorize_ipc_message(const xmlNode *client_msg,
+ pcmk__client_t *curr_client,
+ const char *proxy_session);
+
+extern gboolean send_request(xmlNode * msg, char **msg_reference);
+
+extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig);
+
+void broadcast_remote_state_message(const char *node_name, bool node_up);
+
+#endif
diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c
new file mode 100644
index 0000000..240a978
--- /dev/null
+++ b/daemons/controld/controld_metadata.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright 2017-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdio.h>
+#include <glib.h>
+#include <regex.h>
+
+#include <crm/crm.h>
+#include <crm/lrmd.h>
+
+#include <pacemaker-controld.h>
+
+static void
+ra_param_free(void *param)
+{
+ if (param) {
+ struct ra_param_s *p = (struct ra_param_s *) param;
+
+ if (p->rap_name) {
+ free(p->rap_name);
+ }
+ free(param);
+ }
+}
+
+static void
+metadata_free(void *metadata)
+{
+ if (metadata) {
+ struct ra_metadata_s *md = (struct ra_metadata_s *) metadata;
+
+ g_list_free_full(md->ra_params, ra_param_free);
+ free(metadata);
+ }
+}
+
+GHashTable *
+metadata_cache_new(void)
+{
+ return pcmk__strkey_table(free, metadata_free);
+}
+
+void
+metadata_cache_free(GHashTable *mdc)
+{
+ if (mdc) {
+ crm_trace("Destroying metadata cache with %d members", g_hash_table_size(mdc));
+ g_hash_table_destroy(mdc);
+ }
+}
+
+void
+metadata_cache_reset(GHashTable *mdc)
+{
+ if (mdc) {
+ crm_trace("Resetting metadata cache with %d members",
+ g_hash_table_size(mdc));
+ g_hash_table_remove_all(mdc);
+ }
+}
+
+static struct ra_param_s *
+ra_param_from_xml(xmlNode *param_xml)
+{
+ const char *param_name = crm_element_value(param_xml, "name");
+ struct ra_param_s *p;
+
+ p = calloc(1, sizeof(struct ra_param_s));
+ if (p == NULL) {
+ return NULL;
+ }
+
+ p->rap_name = strdup(param_name);
+ if (p->rap_name == NULL) {
+ free(p);
+ return NULL;
+ }
+
+ if (pcmk__xe_attr_is_true(param_xml, "reloadable")) {
+ controld_set_ra_param_flags(p, ra_param_reloadable);
+ }
+
+ if (pcmk__xe_attr_is_true(param_xml, "unique")) {
+ controld_set_ra_param_flags(p, ra_param_unique);
+ }
+
+ if (pcmk__xe_attr_is_true(param_xml, "private")) {
+ controld_set_ra_param_flags(p, ra_param_private);
+ }
+ return p;
+}
+
+static void
+log_ra_ocf_version(const char *ra_key, const char *ra_ocf_version)
+{
+ if (pcmk__str_empty(ra_ocf_version)) {
+ crm_warn("%s does not advertise OCF version supported", ra_key);
+
+ } else if (compare_version(ra_ocf_version, "2") >= 0) {
+ crm_warn("%s supports OCF version %s (this Pacemaker version supports "
+ PCMK_OCF_VERSION " and might not work properly with agent)",
+ ra_key, ra_ocf_version);
+
+ } else if (compare_version(ra_ocf_version, PCMK_OCF_VERSION) > 0) {
+ crm_info("%s supports OCF version %s (this Pacemaker version supports "
+ PCMK_OCF_VERSION " and might not use all agent features)",
+ ra_key, ra_ocf_version);
+
+ } else {
+ crm_debug("%s supports OCF version %s", ra_key, ra_ocf_version);
+ }
+}
+
+struct ra_metadata_s *
+controld_cache_metadata(GHashTable *mdc, const lrmd_rsc_info_t *rsc,
+ const char *metadata_str)
+{
+ char *key = NULL;
+ const char *reason = NULL;
+ xmlNode *metadata = NULL;
+ xmlNode *match = NULL;
+ struct ra_metadata_s *md = NULL;
+ bool any_private_params = false;
+ bool ocf1_1 = false;
+
+ CRM_CHECK(mdc && rsc && metadata_str, return NULL);
+
+ key = crm_generate_ra_key(rsc->standard, rsc->provider, rsc->type);
+ if (!key) {
+ reason = "Invalid resource agent standard or type";
+ goto err;
+ }
+
+ metadata = string2xml(metadata_str);
+ if (!metadata) {
+ reason = "Metadata is not valid XML";
+ goto err;
+ }
+
+ md = calloc(1, sizeof(struct ra_metadata_s));
+ if (md == NULL) {
+ reason = "Could not allocate memory";
+ goto err;
+ }
+
+ if (strcmp(rsc->standard, PCMK_RESOURCE_CLASS_OCF) == 0) {
+ xmlChar *content = NULL;
+ xmlNode *version_element = first_named_child(metadata, "version");
+
+ if (version_element != NULL) {
+ content = xmlNodeGetContent(version_element);
+ }
+ log_ra_ocf_version(key, (const char *) content);
+ if (content != NULL) {
+ ocf1_1 = (compare_version((const char *) content, "1.1") >= 0);
+ xmlFree(content);
+ }
+ }
+
+ // Check supported actions
+ match = first_named_child(metadata, "actions");
+ for (match = first_named_child(match, "action"); match != NULL;
+ match = crm_next_same_xml(match)) {
+
+ const char *action_name = crm_element_value(match, "name");
+
+ if (pcmk__str_eq(action_name, CRMD_ACTION_RELOAD_AGENT,
+ pcmk__str_none)) {
+ if (ocf1_1) {
+ controld_set_ra_flags(md, key, ra_supports_reload_agent);
+ } else {
+ crm_notice("reload-agent action will not be used with %s "
+ "because it does not support OCF 1.1 or later", key);
+ }
+
+ } else if (!ocf1_1 && pcmk__str_eq(action_name, CRMD_ACTION_RELOAD,
+ pcmk__str_casei)) {
+ controld_set_ra_flags(md, key, ra_supports_legacy_reload);
+ }
+ }
+
+ // Build a parameter list
+ match = first_named_child(metadata, "parameters");
+ for (match = first_named_child(match, "parameter"); match != NULL;
+ match = crm_next_same_xml(match)) {
+
+ const char *param_name = crm_element_value(match, "name");
+
+ if (param_name == NULL) {
+ crm_warn("Metadata for %s:%s:%s has parameter without a name",
+ rsc->standard, rsc->provider, rsc->type);
+ } else {
+ struct ra_param_s *p = ra_param_from_xml(match);
+
+ if (p == NULL) {
+ reason = "Could not allocate memory";
+ goto err;
+ }
+ if (pcmk_is_set(p->rap_flags, ra_param_private)) {
+ any_private_params = true;
+ }
+ md->ra_params = g_list_prepend(md->ra_params, p);
+ }
+ }
+
+ /* Newer resource agents support the "private" parameter attribute to
+ * indicate sensitive parameters. For backward compatibility with older
+ * agents, implicitly treat a few common names as private when the agent
+ * doesn't specify any explicitly.
+ */
+ if (!any_private_params) {
+ for (GList *iter = md->ra_params; iter != NULL; iter = iter->next) {
+ struct ra_param_s *p = iter->data;
+
+ if (pcmk__str_any_of(p->rap_name, "password", "passwd", "user",
+ NULL)) {
+ controld_set_ra_param_flags(p, ra_param_private);
+ }
+ }
+ }
+
+ g_hash_table_replace(mdc, key, md);
+ free_xml(metadata);
+ return md;
+
+err:
+ crm_warn("Unable to update metadata for %s (%s%s%s:%s): %s",
+ rsc->id, rsc->standard, ((rsc->provider == NULL)? "" : ":"),
+ pcmk__s(rsc->provider, ""), rsc->type, reason);
+ free(key);
+ free_xml(metadata);
+ metadata_free(md);
+ return NULL;
+}
+
+/*!
+ * \internal
+ * \brief Get meta-data for a resource
+ *
+ * \param[in,out] lrm_state Use meta-data cache from this executor connection
+ * \param[in] rsc Resource to get meta-data for
+ * \param[in] source Allowed meta-data sources (bitmask of
+ * enum controld_metadata_source_e values)
+ *
+ * \return Meta-data cache entry for given resource, or NULL if not available
+ */
+struct ra_metadata_s *
+controld_get_rsc_metadata(lrm_state_t *lrm_state, const lrmd_rsc_info_t *rsc,
+ uint32_t source)
+{
+ struct ra_metadata_s *metadata = NULL;
+ char *metadata_str = NULL;
+ char *key = NULL;
+ int rc = pcmk_ok;
+
+ CRM_CHECK((lrm_state != NULL) && (rsc != NULL), return NULL);
+
+ if (pcmk_is_set(source, controld_metadata_from_cache)) {
+ key = crm_generate_ra_key(rsc->standard, rsc->provider, rsc->type);
+ if (key != NULL) {
+ metadata = g_hash_table_lookup(lrm_state->metadata_cache, key);
+ free(key);
+ }
+ if (metadata != NULL) {
+ crm_debug("Retrieved metadata for %s (%s%s%s:%s) from cache",
+ rsc->id, rsc->standard,
+ ((rsc->provider == NULL)? "" : ":"),
+ ((rsc->provider == NULL)? "" : rsc->provider),
+ rsc->type);
+ return metadata;
+ }
+ }
+
+ if (!pcmk_is_set(source, controld_metadata_from_agent)) {
+ return NULL;
+ }
+
+ /* For most actions, metadata was cached asynchronously before action
+ * execution (via metadata_complete()).
+ *
+ * However if that failed, and for other actions, retrieve the metadata now
+ * via a local, synchronous, direct execution of the agent.
+ *
+ * This has multiple issues, which is why this is just a fallback: the
+ * executor should execute agents, not the controller; metadata for
+ * Pacemaker Remote nodes should be collected on those nodes, not locally;
+ * the metadata call shouldn't eat into the timeout of the real action being
+ * performed; and the synchronous call blocks the controller (which also
+ * means that if the metadata action tries to contact the controller,
+ * everything will hang until the timeout).
+ */
+ crm_debug("Retrieving metadata for %s (%s%s%s:%s) synchronously",
+ rsc->id, rsc->standard,
+ ((rsc->provider == NULL)? "" : ":"),
+ ((rsc->provider == NULL)? "" : rsc->provider),
+ rsc->type);
+ rc = lrm_state_get_metadata(lrm_state, rsc->standard, rsc->provider,
+ rsc->type, &metadata_str, 0);
+ if (rc != pcmk_ok) {
+ crm_warn("Failed to get metadata for %s (%s%s%s:%s): %s",
+ rsc->id, rsc->standard,
+ ((rsc->provider == NULL)? "" : ":"),
+ ((rsc->provider == NULL)? "" : rsc->provider),
+ rsc->type, pcmk_strerror(rc));
+ return NULL;
+ }
+
+ metadata = controld_cache_metadata(lrm_state->metadata_cache, rsc,
+ metadata_str);
+ free(metadata_str);
+ return metadata;
+}
diff --git a/daemons/controld/controld_metadata.h b/daemons/controld/controld_metadata.h
new file mode 100644
index 0000000..12ea327
--- /dev/null
+++ b/daemons/controld/controld_metadata.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2017-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef CRMD_METADATA_H
+#define CRMD_METADATA_H
+
+#include <stdint.h> // uint32_t
+#include <glib.h> // GList, GHashTable
+#include "controld_lrm.h" // lrm_state_t, lrm_rsc_info_t
+
+/*
+ * @COMPAT pre-OCF-1.1 resource agents
+ *
+ * Pacemaker previously used the "reload" action to reload agent parameters,
+ * but most agents used it to reload the service configuration. Pacemaker also
+ * misused the OCF 1.0 "unique" parameter attribute to indicate reloadability.
+ *
+ * OCF 1.1 created the "reload-agent" action and "reloadable" parameter
+ * attribute for the Pacemaker usage.
+ *
+ * Pacemaker now supports the OCF 1.1 usage. The old usage is now deprecated,
+ * but will be supported if the agent does not claim OCF 1.1 or later
+ * compliance and does not advertise the reload-agent action.
+ */
+enum ra_flags_e {
+ ra_supports_legacy_reload = (1 << 0),
+ ra_supports_reload_agent = (1 << 1),
+};
+
+enum ra_param_flags_e {
+ ra_param_unique = (1 << 0),
+ ra_param_private = (1 << 1),
+ ra_param_reloadable = (1 << 2),
+};
+
+// Allowed sources of resource agent meta-data when requesting it
+enum controld_metadata_source_e {
+ controld_metadata_from_cache = (1 << 0),
+ controld_metadata_from_agent = (1 << 1),
+};
+
+struct ra_param_s {
+ char *rap_name;
+ uint32_t rap_flags; // bitmask of ra_param_flags_s
+};
+
+struct ra_metadata_s {
+ GList *ra_params; // ra_param_s
+ uint32_t ra_flags; // bitmask of ra_flags_e
+};
+
+#define controld_set_ra_flags(ra_md, ra_key, flags_to_set) do { \
+ (ra_md)->ra_flags = pcmk__set_flags_as(__func__, __LINE__, \
+ LOG_TRACE, "Resource agent", ra_key, \
+ (ra_md)->ra_flags, (flags_to_set), #flags_to_set); \
+ } while (0)
+
+#define controld_set_ra_param_flags(ra_param, flags_to_set) do { \
+ (ra_param)->rap_flags = pcmk__set_flags_as(__func__, __LINE__, \
+ LOG_TRACE, "Resource agent parameter", (ra_param)->rap_name, \
+ (ra_param)->rap_flags, (flags_to_set), #flags_to_set); \
+ } while (0)
+
+GHashTable *metadata_cache_new(void);
+void metadata_cache_free(GHashTable *mdc);
+void metadata_cache_reset(GHashTable *mdc);
+
+struct ra_metadata_s *controld_cache_metadata(GHashTable *mdc,
+ const lrmd_rsc_info_t *rsc,
+ const char *metadata_str);
+struct ra_metadata_s *controld_get_rsc_metadata(lrm_state_t *lrm_state,
+ const lrmd_rsc_info_t *rsc,
+ uint32_t source);
+
+static inline const char *
+ra_param_flag2text(enum ra_param_flags_e flag)
+{
+ switch (flag) {
+ case ra_param_reloadable:
+ return "reloadable";
+ case ra_param_unique:
+ return "unique";
+ case ra_param_private:
+ return "private";
+ default:
+ return "unknown";
+ }
+}
+
+#endif
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
new file mode 100644
index 0000000..f24b755
--- /dev/null
+++ b/daemons/controld/controld_remote_ra.c
@@ -0,0 +1,1440 @@
+/*
+ * Copyright 2013-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml_internal.h>
+#include <crm/lrmd.h>
+#include <crm/lrmd_internal.h>
+#include <crm/services.h>
+
+#include <pacemaker-controld.h>
+
+#define REMOTE_LRMD_RA "remote"
+
+/* The max start timeout before cmd retry */
+#define MAX_START_TIMEOUT_MS 10000
+
+#define cmd_set_flags(cmd, flags_to_set) do { \
+ (cmd)->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
+ "Remote command", (cmd)->rsc_id, (cmd)->status, \
+ (flags_to_set), #flags_to_set); \
+ } while (0)
+
+#define cmd_clear_flags(cmd, flags_to_clear) do { \
+ (cmd)->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
+ "Remote command", (cmd)->rsc_id, (cmd)->status, \
+ (flags_to_clear), #flags_to_clear); \
+ } while (0)
+
+enum remote_cmd_status {
+ cmd_reported_success = (1 << 0),
+ cmd_cancel = (1 << 1),
+};
+
+typedef struct remote_ra_cmd_s {
+ /*! the local node the cmd is issued from */
+ char *owner;
+ /*! the remote node the cmd is executed on */
+ char *rsc_id;
+ /*! the action to execute */
+ char *action;
+ /*! some string the client wants us to give it back */
+ char *userdata;
+ /*! start delay in ms */
+ int start_delay;
+ /*! timer id used for start delay. */
+ int delay_id;
+ /*! timeout in ms for cmd */
+ int timeout;
+ int remaining_timeout;
+ /*! recurring interval in ms */
+ guint interval_ms;
+ /*! interval timer id */
+ int interval_id;
+ int monitor_timeout_id;
+ int takeover_timeout_id;
+ /*! action parameters */
+ lrmd_key_value_t *params;
+ pcmk__action_result_t result;
+ int call_id;
+ time_t start_time;
+ uint32_t status;
+} remote_ra_cmd_t;
+
+#define lrm_remote_set_flags(lrm_state, flags_to_set) do { \
+ lrm_state_t *lrm = (lrm_state); \
+ remote_ra_data_t *ra = lrm->remote_ra_data; \
+ ra->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
+ lrm->node_name, ra->status, \
+ (flags_to_set), #flags_to_set); \
+ } while (0)
+
+#define lrm_remote_clear_flags(lrm_state, flags_to_clear) do { \
+ lrm_state_t *lrm = (lrm_state); \
+ remote_ra_data_t *ra = lrm->remote_ra_data; \
+ ra->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
+ lrm->node_name, ra->status, \
+ (flags_to_clear), #flags_to_clear); \
+ } while (0)
+
+enum remote_status {
+ expect_takeover = (1 << 0),
+ takeover_complete = (1 << 1),
+ remote_active = (1 << 2),
+ /* Maintenance mode is difficult to determine from the controller's context,
+ * so we have it signalled back with the transition from the scheduler.
+ */
+ remote_in_maint = (1 << 3),
+ /* Similar for whether we are controlling a guest node or remote node.
+ * Fortunately there is a meta-attribute in the transition already and
+ * as the situation doesn't change over time we can use the
+ * resource start for noting down the information for later use when
+ * the attributes aren't at hand.
+ */
+ controlling_guest = (1 << 4),
+};
+
+typedef struct remote_ra_data_s {
+ crm_trigger_t *work;
+ remote_ra_cmd_t *cur_cmd;
+ GList *cmds;
+ GList *recurring_cmds;
+ uint32_t status;
+} remote_ra_data_t;
+
+static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
+static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
+static GList *fail_all_monitor_cmds(GList * list);
+
+static void
+free_cmd(gpointer user_data)
+{
+ remote_ra_cmd_t *cmd = user_data;
+
+ if (!cmd) {
+ return;
+ }
+ if (cmd->delay_id) {
+ g_source_remove(cmd->delay_id);
+ }
+ if (cmd->interval_id) {
+ g_source_remove(cmd->interval_id);
+ }
+ if (cmd->monitor_timeout_id) {
+ g_source_remove(cmd->monitor_timeout_id);
+ }
+ if (cmd->takeover_timeout_id) {
+ g_source_remove(cmd->takeover_timeout_id);
+ }
+ free(cmd->owner);
+ free(cmd->rsc_id);
+ free(cmd->action);
+ free(cmd->userdata);
+ pcmk__reset_result(&(cmd->result));
+ lrmd_key_value_freeall(cmd->params);
+ free(cmd);
+}
+
+static int
+generate_callid(void)
+{
+ static int remote_ra_callid = 0;
+
+ remote_ra_callid++;
+ if (remote_ra_callid <= 0) {
+ remote_ra_callid = 1;
+ }
+
+ return remote_ra_callid;
+}
+
+static gboolean
+recurring_helper(gpointer data)
+{
+ remote_ra_cmd_t *cmd = data;
+ lrm_state_t *connection_rsc = NULL;
+
+ cmd->interval_id = 0;
+ connection_rsc = lrm_state_find(cmd->rsc_id);
+ if (connection_rsc && connection_rsc->remote_ra_data) {
+ remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
+
+ ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
+
+ ra_data->cmds = g_list_append(ra_data->cmds, cmd);
+ mainloop_set_trigger(ra_data->work);
+ }
+ return FALSE;
+}
+
+static gboolean
+start_delay_helper(gpointer data)
+{
+ remote_ra_cmd_t *cmd = data;
+ lrm_state_t *connection_rsc = NULL;
+
+ cmd->delay_id = 0;
+ connection_rsc = lrm_state_find(cmd->rsc_id);
+ if (connection_rsc && connection_rsc->remote_ra_data) {
+ remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
+
+ mainloop_set_trigger(ra_data->work);
+ }
+ return FALSE;
+}
+
+static bool
+should_purge_attributes(crm_node_t *node)
+{
+ bool purge = true;
+ crm_node_t *conn_node = NULL;
+ lrm_state_t *connection_rsc = NULL;
+
+ if (!node->conn_host) {
+ return purge;
+ }
+
+ /* Get the node that was hosting the remote connection resource from the
+ * peer cache. That's the one we really care about here.
+ */
+ conn_node = crm_get_peer(0, node->conn_host);
+ if (conn_node == NULL) {
+ return purge;
+ }
+
+ /* Check the uptime of connection_rsc. If it hasn't been running long
+ * enough, set purge=true. "Long enough" means it started running earlier
+ * than the timestamp when we noticed it went away in the first place.
+ */
+ connection_rsc = lrm_state_find(node->uname);
+
+ if (connection_rsc != NULL) {
+ lrmd_t *lrm = connection_rsc->conn;
+ time_t uptime = lrmd__uptime(lrm);
+ time_t now = time(NULL);
+
+ /* Add 20s of fuzziness to give corosync a while to notice the remote
+ * host is gone. On various error conditions (failure to get uptime,
+ * peer_lost isn't set) we default to purging.
+ */
+ if (uptime > 0 &&
+ conn_node->peer_lost > 0 &&
+ uptime + 20 >= now - conn_node->peer_lost) {
+ purge = false;
+ }
+ }
+
+ return purge;
+}
+
+static enum controld_section_e
+section_to_delete(bool purge)
+{
+ if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
+ if (purge) {
+ return controld_section_all_unlocked;
+ } else {
+ return controld_section_lrm_unlocked;
+ }
+ } else {
+ if (purge) {
+ return controld_section_all;
+ } else {
+ return controld_section_lrm;
+ }
+ }
+}
+
+static void
+purge_remote_node_attrs(int call_opt, crm_node_t *node)
+{
+ bool purge = should_purge_attributes(node);
+ enum controld_section_e section = section_to_delete(purge);
+
+ /* Purge node from attrd's memory */
+ if (purge) {
+ update_attrd_remote_node_removed(node->uname, NULL);
+ }
+
+ controld_delete_node_state(node->uname, section, call_opt);
+}
+
+/*!
+ * \internal
+ * \brief Handle cluster communication related to pacemaker_remote node joining
+ *
+ * \param[in] node_name Name of newly integrated pacemaker_remote node
+ */
+static void
+remote_node_up(const char *node_name)
+{
+ int call_opt;
+ xmlNode *update, *state;
+ crm_node_t *node;
+
+ CRM_CHECK(node_name != NULL, return);
+ crm_info("Announcing Pacemaker Remote node %s", node_name);
+
+ call_opt = crmd_cib_smart_opt();
+
+ /* Delete node's probe_complete attribute. This serves two purposes:
+ *
+ * - @COMPAT DCs < 1.1.14 in a rolling upgrade might use it
+ * - deleting it (or any attribute for that matter) here ensures the
+ * attribute manager learns the node is remote
+ */
+ update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
+
+ /* Ensure node is in the remote peer cache with member status */
+ node = crm_remote_peer_get(node_name);
+ CRM_CHECK(node != NULL, return);
+
+ purge_remote_node_attrs(call_opt, node);
+ pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
+
+ /* pacemaker_remote nodes don't participate in the membership layer,
+ * so cluster nodes don't automatically get notified when they come and go.
+ * We send a cluster message to the DC, and update the CIB node state entry,
+ * so the DC will get it sooner (via message) or later (via CIB refresh),
+ * and any other interested parties can query the CIB.
+ */
+ broadcast_remote_state_message(node_name, true);
+
+ update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
+ state = create_node_state_update(node, node_update_cluster, update,
+ __func__);
+
+ /* Clear the XML_NODE_IS_FENCED flag in the node state. If the node ever
+ * needs to be fenced, this flag will allow various actions to determine
+ * whether the fencing has happened yet.
+ */
+ crm_xml_add(state, XML_NODE_IS_FENCED, "0");
+
+ /* TODO: If the remote connection drops, and this (async) CIB update either
+ * failed or has not yet completed, later actions could mistakenly think the
+ * node has already been fenced (if the XML_NODE_IS_FENCED attribute was
+ * previously set, because it won't have been cleared). This could prevent
+ * actual fencing or allow recurring monitor failures to be cleared too
+ * soon. Ideally, we wouldn't rely on the CIB for the fenced status.
+ */
+ controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, NULL);
+ free_xml(update);
+}
+
+enum down_opts {
+ DOWN_KEEP_LRM,
+ DOWN_ERASE_LRM
+};
+
+/*!
+ * \internal
+ * \brief Handle cluster communication related to pacemaker_remote node leaving
+ *
+ * \param[in] node_name Name of lost node
+ * \param[in] opts Whether to keep or erase LRM history
+ */
+static void
+remote_node_down(const char *node_name, const enum down_opts opts)
+{
+ xmlNode *update;
+ int call_opt = crmd_cib_smart_opt();
+ crm_node_t *node;
+
+ /* Purge node from attrd's memory */
+ update_attrd_remote_node_removed(node_name, NULL);
+
+ /* Normally, only node attributes should be erased, and the resource history
+ * should be kept until the node comes back up. However, after a successful
+ * fence, we want to clear the history as well, so we don't think resources
+ * are still running on the node.
+ */
+ if (opts == DOWN_ERASE_LRM) {
+ controld_delete_node_state(node_name, controld_section_all, call_opt);
+ } else {
+ controld_delete_node_state(node_name, controld_section_attrs, call_opt);
+ }
+
+ /* Ensure node is in the remote peer cache with lost state */
+ node = crm_remote_peer_get(node_name);
+ CRM_CHECK(node != NULL, return);
+ pcmk__update_peer_state(__func__, node, CRM_NODE_LOST, 0);
+
+ /* Notify DC */
+ broadcast_remote_state_message(node_name, false);
+
+ /* Update CIB node state */
+ update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
+ create_node_state_update(node, node_update_cluster, update, __func__);
+ controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, NULL);
+ free_xml(update);
+}
+
+/*!
+ * \internal
+ * \brief Handle effects of a remote RA command on node state
+ *
+ * \param[in] cmd Completed remote RA command
+ */
+static void
+check_remote_node_state(const remote_ra_cmd_t *cmd)
+{
+ /* Only successful actions can change node state */
+ if (!pcmk__result_ok(&(cmd->result))) {
+ return;
+ }
+
+ if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
+ remote_node_up(cmd->rsc_id);
+
+ } else if (pcmk__str_eq(cmd->action, "migrate_from", pcmk__str_casei)) {
+ /* After a successful migration, we don't need to do remote_node_up()
+ * because the DC already knows the node is up, and we don't want to
+ * clear LRM history etc. We do need to add the remote node to this
+ * host's remote peer cache, because (unless it happens to be DC)
+ * it hasn't been tracking the remote node, and other code relies on
+ * the cache to distinguish remote nodes from unseen cluster nodes.
+ */
+ crm_node_t *node = crm_remote_peer_get(cmd->rsc_id);
+
+ CRM_CHECK(node != NULL, return);
+ pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
+
+ } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
+ lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
+ remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
+
+ if (ra_data) {
+ if (!pcmk_is_set(ra_data->status, takeover_complete)) {
+ /* Stop means down if we didn't successfully migrate elsewhere */
+ remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
+ } else if (AM_I_DC == FALSE) {
+ /* Only the connection host and DC track node state,
+ * so if the connection migrated elsewhere and we aren't DC,
+ * un-cache the node, so we don't have stale info
+ */
+ crm_remote_peer_cache_remove(cmd->rsc_id);
+ }
+ }
+ }
+
+ /* We don't do anything for successful monitors, which is correct for
+ * routine recurring monitors, and for monitors on nodes where the
+ * connection isn't supposed to be (the cluster will stop the connection in
+ * that case). However, if the initial probe finds the connection already
+ * active on the node where we want it, we probably should do
+ * remote_node_up(). Unfortunately, we can't distinguish that case here.
+ * Given that connections have to be initiated by the cluster, the chance of
+ * that should be close to zero.
+ */
+}
+
+static void
+report_remote_ra_result(remote_ra_cmd_t * cmd)
+{
+ lrmd_event_data_t op = { 0, };
+
+ check_remote_node_state(cmd);
+
+ op.type = lrmd_event_exec_complete;
+ op.rsc_id = cmd->rsc_id;
+ op.op_type = cmd->action;
+ op.user_data = cmd->userdata;
+ op.timeout = cmd->timeout;
+ op.interval_ms = cmd->interval_ms;
+ op.t_run = (unsigned int) cmd->start_time;
+ op.t_rcchange = (unsigned int) cmd->start_time;
+
+ lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status,
+ cmd->result.exit_reason);
+
+ if (pcmk_is_set(cmd->status, cmd_reported_success) && !pcmk__result_ok(&(cmd->result))) {
+ op.t_rcchange = (unsigned int) time(NULL);
+ /* This edge case will likely never ever occur, but if it does the
+ * result is that a failure will not be processed correctly. This is only
+ * remotely possible because we are able to detect a connection resource's tcp
+ * connection has failed at any moment after start has completed. The actual
+ * recurring operation is just a connectivity ping.
+ *
+ * basically, we are not guaranteed that the first successful monitor op and
+ * a subsequent failed monitor op will not occur in the same timestamp. We have to
+ * make it look like the operations occurred at separate times though. */
+ if (op.t_rcchange == op.t_run) {
+ op.t_rcchange++;
+ }
+ }
+
+ if (cmd->params) {
+ lrmd_key_value_t *tmp;
+
+ op.params = pcmk__strkey_table(free, free);
+ for (tmp = cmd->params; tmp; tmp = tmp->next) {
+ g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value));
+ }
+
+ }
+ op.call_id = cmd->call_id;
+ op.remote_nodename = cmd->owner;
+
+ lrm_op_callback(&op);
+
+ if (op.params) {
+ g_hash_table_destroy(op.params);
+ }
+ lrmd__reset_result(&op);
+}
+
+static void
+update_remaining_timeout(remote_ra_cmd_t * cmd)
+{
+ cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
+}
+
+static gboolean
+retry_start_cmd_cb(gpointer data)
+{
+ lrm_state_t *lrm_state = data;
+ remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
+ remote_ra_cmd_t *cmd = NULL;
+ int rc = ETIME;
+
+ if (!ra_data || !ra_data->cur_cmd) {
+ return FALSE;
+ }
+ cmd = ra_data->cur_cmd;
+ if (!pcmk__strcase_any_of(cmd->action, "start", "migrate_from", NULL)) {
+ return FALSE;
+ }
+ update_remaining_timeout(cmd);
+
+ if (cmd->remaining_timeout > 0) {
+ rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
+ } else {
+ pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
+ PCMK_EXEC_TIMEOUT,
+ "Not enough time remains to retry remote connection");
+ }
+
+ if (rc != pcmk_rc_ok) {
+ report_remote_ra_result(cmd);
+
+ if (ra_data->cmds) {
+ mainloop_set_trigger(ra_data->work);
+ }
+ ra_data->cur_cmd = NULL;
+ free_cmd(cmd);
+ } else {
+ /* wait for connection event */
+ }
+
+ return FALSE;
+}
+
+
+static gboolean
+connection_takeover_timeout_cb(gpointer data)
+{
+ lrm_state_t *lrm_state = NULL;
+ remote_ra_cmd_t *cmd = data;
+
+ crm_info("takeover event timed out for node %s", cmd->rsc_id);
+ cmd->takeover_timeout_id = 0;
+
+ lrm_state = lrm_state_find(cmd->rsc_id);
+
+ handle_remote_ra_stop(lrm_state, cmd);
+ free_cmd(cmd);
+
+ return FALSE;
+}
+
+static gboolean
+monitor_timeout_cb(gpointer data)
+{
+ lrm_state_t *lrm_state = NULL;
+ remote_ra_cmd_t *cmd = data;
+
+ lrm_state = lrm_state_find(cmd->rsc_id);
+
+ crm_info("Timed out waiting for remote poke response from %s%s",
+ cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
+ cmd->monitor_timeout_id = 0;
+ pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
+ "Remote executor did not respond");
+
+ if (lrm_state && lrm_state->remote_ra_data) {
+ remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
+
+ if (ra_data->cur_cmd == cmd) {
+ ra_data->cur_cmd = NULL;
+ }
+ if (ra_data->cmds) {
+ mainloop_set_trigger(ra_data->work);
+ }
+ }
+
+ report_remote_ra_result(cmd);
+ free_cmd(cmd);
+
+ if(lrm_state) {
+ lrm_state_disconnect(lrm_state);
+ }
+ return FALSE;
+}
+
+static void
+synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
+{
+ lrmd_event_data_t op = { 0, };
+
+ if (lrm_state == NULL) {
+ /* if lrm_state not given assume local */
+ lrm_state = lrm_state_find(controld_globals.our_nodename);
+ }
+ CRM_ASSERT(lrm_state != NULL);
+
+ op.type = lrmd_event_exec_complete;
+ op.rsc_id = rsc_id;
+ op.op_type = op_type;
+ op.t_run = (unsigned int) time(NULL);
+ op.t_rcchange = op.t_run;
+ op.call_id = generate_callid();
+ lrmd__set_result(&op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
+ process_lrm_event(lrm_state, &op, NULL, NULL);
+}
+
+void
+remote_lrm_op_callback(lrmd_event_data_t * op)
+{
+ gboolean cmd_handled = FALSE;
+ lrm_state_t *lrm_state = NULL;
+ remote_ra_data_t *ra_data = NULL;
+ remote_ra_cmd_t *cmd = NULL;
+
+ crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
+ "(%d) status=%s (%d)",
+ (op->op_type? op->op_type : ""), (op->op_type? " " : ""),
+ lrmd_event_type2str(op->type), op->remote_nodename,
+ services_ocf_exitcode_str(op->rc), op->rc,
+ pcmk_exec_status_str(op->op_status), op->op_status);
+
+ lrm_state = lrm_state_find(op->remote_nodename);
+ if (!lrm_state || !lrm_state->remote_ra_data) {
+ crm_debug("No state information found for remote connection event");
+ return;
+ }
+ ra_data = lrm_state->remote_ra_data;
+
+ if (op->type == lrmd_event_new_client) {
+ // Another client has connected to the remote daemon
+
+ if (pcmk_is_set(ra_data->status, expect_takeover)) {
+ // Great, we knew this was coming
+ lrm_remote_clear_flags(lrm_state, expect_takeover);
+ lrm_remote_set_flags(lrm_state, takeover_complete);
+
+ } else {
+ crm_err("Disconnecting from Pacemaker Remote node %s due to "
+ "unexpected client takeover", op->remote_nodename);
+ /* In this case, lrmd_tls_connection_destroy() will be called under the control of mainloop. */
+ /* Do not free lrm_state->conn yet. */
+ /* It'll be freed in the following stop action. */
+ lrm_state_disconnect_only(lrm_state);
+ }
+ return;
+ }
+
+ /* filter all EXEC events up */
+ if (op->type == lrmd_event_exec_complete) {
+ if (pcmk_is_set(ra_data->status, takeover_complete)) {
+ crm_debug("ignoring event, this connection is taken over by another node");
+ } else {
+ lrm_op_callback(op);
+ }
+ return;
+ }
+
+ if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
+
+ if (!pcmk_is_set(ra_data->status, remote_active)) {
+ crm_debug("Disconnection from Pacemaker Remote node %s complete",
+ lrm_state->node_name);
+
+ } else if (!remote_ra_is_in_maintenance(lrm_state)) {
+ crm_err("Lost connection to Pacemaker Remote node %s",
+ lrm_state->node_name);
+ ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
+ ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
+
+ } else {
+ crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
+ lrm_state->node_name);
+ /* Do roughly what a 'stop' on the remote-resource would do */
+ handle_remote_ra_stop(lrm_state, NULL);
+ remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
+ /* now fake the reply of a successful 'stop' */
+ synthesize_lrmd_success(NULL, lrm_state->node_name, "stop");
+ }
+ return;
+ }
+
+ if (!ra_data->cur_cmd) {
+ crm_debug("no event to match");
+ return;
+ }
+
+ cmd = ra_data->cur_cmd;
+
+ /* Start actions and migrate from actions complete after connection
+ * comes back to us. */
+ if (op->type == lrmd_event_connect && pcmk__strcase_any_of(cmd->action, "start",
+ "migrate_from", NULL)) {
+ if (op->connection_rc < 0) {
+ update_remaining_timeout(cmd);
+
+ if ((op->connection_rc == -ENOKEY)
+ || (op->connection_rc == -EKEYREJECTED)) {
+ // Hard error, don't retry
+ pcmk__set_result(&(cmd->result), PCMK_OCF_INVALID_PARAM,
+ PCMK_EXEC_ERROR,
+ pcmk_strerror(op->connection_rc));
+
+ } else if (cmd->remaining_timeout > 3000) {
+ crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
+ g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
+ return;
+
+ } else {
+ crm_trace("can't reschedule start, remaining timeout too small %d",
+ cmd->remaining_timeout);
+ pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
+ PCMK_EXEC_TIMEOUT,
+ "%s without enough time to retry",
+ pcmk_strerror(op->connection_rc));
+ }
+
+ } else {
+ lrm_state_reset_tables(lrm_state, TRUE);
+ pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
+ lrm_remote_set_flags(lrm_state, remote_active);
+ }
+
+ crm_debug("Remote connection event matched %s action", cmd->action);
+ report_remote_ra_result(cmd);
+ cmd_handled = TRUE;
+
+ } else if (op->type == lrmd_event_poke && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
+
+ if (cmd->monitor_timeout_id) {
+ g_source_remove(cmd->monitor_timeout_id);
+ cmd->monitor_timeout_id = 0;
+ }
+
+ /* Only report success the first time, after that only worry about failures.
+ * For this function, if we get the poke pack, it is always a success. Pokes
+ * only fail if the send fails, or the response times out. */
+ if (!pcmk_is_set(cmd->status, cmd_reported_success)) {
+ pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
+ report_remote_ra_result(cmd);
+ cmd_set_flags(cmd, cmd_reported_success);
+ }
+
+ crm_debug("Remote poke event matched %s action", cmd->action);
+
+ /* success, keep rescheduling if interval is present. */
+ if (cmd->interval_ms && !pcmk_is_set(cmd->status, cmd_cancel)) {
+ ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
+ cmd->interval_id = g_timeout_add(cmd->interval_ms,
+ recurring_helper, cmd);
+ cmd = NULL; /* prevent free */
+ }
+ cmd_handled = TRUE;
+
+ } else if (op->type == lrmd_event_disconnect && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
+ if (pcmk_is_set(ra_data->status, remote_active) &&
+ !pcmk_is_set(cmd->status, cmd_cancel)) {
+ pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
+ PCMK_EXEC_ERROR,
+ "Remote connection unexpectedly dropped "
+ "during monitor");
+ report_remote_ra_result(cmd);
+ crm_err("Remote connection to %s unexpectedly dropped during monitor",
+ lrm_state->node_name);
+ }
+ cmd_handled = TRUE;
+
+ } else if (op->type == lrmd_event_new_client && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
+
+ handle_remote_ra_stop(lrm_state, cmd);
+ cmd_handled = TRUE;
+
+ } else {
+ crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
+ }
+
+ if (cmd_handled) {
+ ra_data->cur_cmd = NULL;
+ if (ra_data->cmds) {
+ mainloop_set_trigger(ra_data->work);
+ }
+ free_cmd(cmd);
+ }
+}
+
+static void
+handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
+{
+ remote_ra_data_t *ra_data = NULL;
+
+ CRM_ASSERT(lrm_state);
+ ra_data = lrm_state->remote_ra_data;
+
+ if (!pcmk_is_set(ra_data->status, takeover_complete)) {
+ /* delete pending ops when ever the remote connection is intentionally stopped */
+ g_hash_table_remove_all(lrm_state->active_ops);
+ } else {
+ /* we no longer hold the history if this connection has been migrated,
+ * however, we keep metadata cache for future use */
+ lrm_state_reset_tables(lrm_state, FALSE);
+ }
+
+ lrm_remote_clear_flags(lrm_state, remote_active);
+ lrm_state_disconnect(lrm_state);
+
+ if (ra_data->cmds) {
+ g_list_free_full(ra_data->cmds, free_cmd);
+ }
+ if (ra_data->recurring_cmds) {
+ g_list_free_full(ra_data->recurring_cmds, free_cmd);
+ }
+ ra_data->cmds = NULL;
+ ra_data->recurring_cmds = NULL;
+ ra_data->cur_cmd = NULL;
+
+ if (cmd) {
+ pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
+ report_remote_ra_result(cmd);
+ }
+}
+
+// \return Standard Pacemaker return code
+static int
+handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
+{
+ const char *server = NULL;
+ lrmd_key_value_t *tmp = NULL;
+ int port = 0;
+ int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
+ int rc = pcmk_rc_ok;
+
+ for (tmp = cmd->params; tmp; tmp = tmp->next) {
+ if (pcmk__strcase_any_of(tmp->key, XML_RSC_ATTR_REMOTE_RA_ADDR,
+ XML_RSC_ATTR_REMOTE_RA_SERVER, NULL)) {
+ server = tmp->value;
+ } else if (pcmk__str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT, pcmk__str_casei)) {
+ port = atoi(tmp->value);
+ } else if (pcmk__str_eq(tmp->key, CRM_META "_" XML_RSC_ATTR_CONTAINER, pcmk__str_casei)) {
+ lrm_remote_set_flags(lrm_state, controlling_guest);
+ }
+ }
+
+ rc = controld_connect_remote_executor(lrm_state, server, port,
+ timeout_used);
+ if (rc != pcmk_rc_ok) {
+ pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
+ PCMK_EXEC_ERROR,
+ "Could not connect to Pacemaker Remote node %s: %s",
+ lrm_state->node_name, pcmk_rc_str(rc));
+ }
+ return rc;
+}
+
+static gboolean
+handle_remote_ra_exec(gpointer user_data)
+{
+ int rc = 0;
+ lrm_state_t *lrm_state = user_data;
+ remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
+ remote_ra_cmd_t *cmd;
+ GList *first = NULL;
+
+ if (ra_data->cur_cmd) {
+ /* still waiting on previous cmd */
+ return TRUE;
+ }
+
+ while (ra_data->cmds) {
+ first = ra_data->cmds;
+ cmd = first->data;
+ if (cmd->delay_id) {
+ /* still waiting for start delay timer to trip */
+ return TRUE;
+ }
+
+ ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
+ g_list_free_1(first);
+
+ if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) {
+ lrm_remote_clear_flags(lrm_state, expect_takeover | takeover_complete);
+ if (handle_remote_ra_start(lrm_state, cmd,
+ cmd->timeout) == pcmk_rc_ok) {
+ /* take care of this later when we get async connection result */
+ crm_debug("Initiated async remote connection, %s action will complete after connect event",
+ cmd->action);
+ ra_data->cur_cmd = cmd;
+ return TRUE;
+ }
+ report_remote_ra_result(cmd);
+
+ } else if (!strcmp(cmd->action, "monitor")) {
+
+ if (lrm_state_is_connected(lrm_state) == TRUE) {
+ rc = lrm_state_poke_connection(lrm_state);
+ if (rc < 0) {
+ pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
+ PCMK_EXEC_ERROR, pcmk_strerror(rc));
+ }
+ } else {
+ rc = -1;
+ pcmk__set_result(&(cmd->result), PCMK_OCF_NOT_RUNNING,
+ PCMK_EXEC_DONE, "Remote connection inactive");
+ }
+
+ if (rc == 0) {
+ crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
+ cmd->rsc_id);
+ ra_data->cur_cmd = cmd;
+ cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
+ return TRUE;
+ }
+ report_remote_ra_result(cmd);
+
+ } else if (!strcmp(cmd->action, "stop")) {
+
+ if (pcmk_is_set(ra_data->status, expect_takeover)) {
+ /* briefly wait on stop for the takeover event to occur. If the
+ * takeover event does not occur during the wait period, that's fine.
+ * It just means that the remote-node's lrm_status section is going to get
+ * cleared which will require all the resources running in the remote-node
+ * to be explicitly re-detected via probe actions. If the takeover does occur
+ * successfully, then we can leave the status section intact. */
+ cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
+ ra_data->cur_cmd = cmd;
+ return TRUE;
+ }
+
+ handle_remote_ra_stop(lrm_state, cmd);
+
+ } else if (!strcmp(cmd->action, "migrate_to")) {
+ lrm_remote_clear_flags(lrm_state, takeover_complete);
+ lrm_remote_set_flags(lrm_state, expect_takeover);
+ pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
+ report_remote_ra_result(cmd);
+ } else if (pcmk__str_any_of(cmd->action, CRMD_ACTION_RELOAD,
+ CRMD_ACTION_RELOAD_AGENT, NULL)) {
+ /* Currently the only reloadable parameter is reconnect_interval,
+ * which is only used by the scheduler via the CIB, so reloads are a
+ * no-op.
+ *
+ * @COMPAT DC <2.1.0: We only need to check for "reload" in case
+ * we're in a rolling upgrade with a DC scheduling "reload" instead
+ * of "reload-agent". An OCF 1.1 "reload" would be a no-op anyway,
+ * so this would work for that purpose as well.
+ */
+ pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
+ report_remote_ra_result(cmd);
+ }
+
+ free_cmd(cmd);
+ }
+
+ return TRUE;
+}
+
+static void
+remote_ra_data_init(lrm_state_t * lrm_state)
+{
+ remote_ra_data_t *ra_data = NULL;
+
+ if (lrm_state->remote_ra_data) {
+ return;
+ }
+
+ ra_data = calloc(1, sizeof(remote_ra_data_t));
+ ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
+ lrm_state->remote_ra_data = ra_data;
+}
+
+void
+remote_ra_cleanup(lrm_state_t * lrm_state)
+{
+ remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
+
+ if (!ra_data) {
+ return;
+ }
+
+ if (ra_data->cmds) {
+ g_list_free_full(ra_data->cmds, free_cmd);
+ }
+
+ if (ra_data->recurring_cmds) {
+ g_list_free_full(ra_data->recurring_cmds, free_cmd);
+ }
+ mainloop_destroy_trigger(ra_data->work);
+ free(ra_data);
+ lrm_state->remote_ra_data = NULL;
+}
+
+gboolean
+is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
+{
+ if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
+ return TRUE;
+ }
+ if ((id != NULL) && (lrm_state_find(id) != NULL)
+ && !pcmk__str_eq(id, controld_globals.our_nodename, pcmk__str_casei)) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+lrmd_rsc_info_t *
+remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
+{
+ lrmd_rsc_info_t *info = NULL;
+
+ if ((lrm_state_find(rsc_id))) {
+ info = calloc(1, sizeof(lrmd_rsc_info_t));
+
+ info->id = strdup(rsc_id);
+ info->type = strdup(REMOTE_LRMD_RA);
+ info->standard = strdup(PCMK_RESOURCE_CLASS_OCF);
+ info->provider = strdup("pacemaker");
+ }
+
+ return info;
+}
+
+static gboolean
+is_remote_ra_supported_action(const char *action)
+{
+ return pcmk__str_any_of(action,
+ CRMD_ACTION_START,
+ CRMD_ACTION_STOP,
+ CRMD_ACTION_STATUS,
+ CRMD_ACTION_MIGRATE,
+ CRMD_ACTION_MIGRATED,
+ CRMD_ACTION_RELOAD_AGENT,
+ CRMD_ACTION_RELOAD,
+ NULL);
+}
+
+static GList *
+fail_all_monitor_cmds(GList * list)
+{
+ GList *rm_list = NULL;
+ remote_ra_cmd_t *cmd = NULL;
+ GList *gIter = NULL;
+
+ for (gIter = list; gIter != NULL; gIter = gIter->next) {
+ cmd = gIter->data;
+ if ((cmd->interval_ms > 0) && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
+ rm_list = g_list_append(rm_list, cmd);
+ }
+ }
+
+ for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
+ cmd = gIter->data;
+
+ pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
+ PCMK_EXEC_ERROR, "Lost connection to remote executor");
+ crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
+ cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
+ report_remote_ra_result(cmd);
+
+ list = g_list_remove(list, cmd);
+ free_cmd(cmd);
+ }
+
+ /* frees only the list data, not the cmds */
+ g_list_free(rm_list);
+ return list;
+}
+
+static GList *
+remove_cmd(GList * list, const char *action, guint interval_ms)
+{
+ remote_ra_cmd_t *cmd = NULL;
+ GList *gIter = NULL;
+
+ for (gIter = list; gIter != NULL; gIter = gIter->next) {
+ cmd = gIter->data;
+ if ((cmd->interval_ms == interval_ms)
+ && pcmk__str_eq(cmd->action, action, pcmk__str_casei)) {
+ break;
+ }
+ cmd = NULL;
+ }
+ if (cmd) {
+ list = g_list_remove(list, cmd);
+ free_cmd(cmd);
+ }
+ return list;
+}
+
+int
+remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
+ const char *action, guint interval_ms)
+{
+ lrm_state_t *connection_rsc = NULL;
+ remote_ra_data_t *ra_data = NULL;
+
+ connection_rsc = lrm_state_find(rsc_id);
+ if (!connection_rsc || !connection_rsc->remote_ra_data) {
+ return -EINVAL;
+ }
+
+ ra_data = connection_rsc->remote_ra_data;
+ ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
+ ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
+ interval_ms);
+ if (ra_data->cur_cmd &&
+ (ra_data->cur_cmd->interval_ms == interval_ms) &&
+ (pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) {
+
+ cmd_set_flags(ra_data->cur_cmd, cmd_cancel);
+ }
+
+ return 0;
+}
+
+static remote_ra_cmd_t *
+handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
+ const char *userdata)
+{
+ GList *gIter = NULL;
+ remote_ra_cmd_t *cmd = NULL;
+
+ /* there are 3 places a potential duplicate monitor operation
+ * could exist.
+ * 1. recurring_cmds list. where the op is waiting for its next interval
+ * 2. cmds list, where the op is queued to get executed immediately
+ * 3. cur_cmd, which means the monitor op is in flight right now.
+ */
+ if (interval_ms == 0) {
+ return NULL;
+ }
+
+ if (ra_data->cur_cmd &&
+ !pcmk_is_set(ra_data->cur_cmd->status, cmd_cancel) &&
+ (ra_data->cur_cmd->interval_ms == interval_ms) &&
+ pcmk__str_eq(ra_data->cur_cmd->action, "monitor", pcmk__str_casei)) {
+
+ cmd = ra_data->cur_cmd;
+ goto handle_dup;
+ }
+
+ for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
+ cmd = gIter->data;
+ if ((cmd->interval_ms == interval_ms)
+ && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
+ goto handle_dup;
+ }
+ }
+
+ for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
+ cmd = gIter->data;
+ if ((cmd->interval_ms == interval_ms)
+ && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
+ goto handle_dup;
+ }
+ }
+
+ return NULL;
+
+handle_dup:
+
+ crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT,
+ cmd->rsc_id, "monitor", interval_ms);
+
+ /* update the userdata */
+ if (userdata) {
+ free(cmd->userdata);
+ cmd->userdata = strdup(userdata);
+ }
+
+ /* if we've already reported success, generate a new call id */
+ if (pcmk_is_set(cmd->status, cmd_reported_success)) {
+ cmd->start_time = time(NULL);
+ cmd->call_id = generate_callid();
+ cmd_clear_flags(cmd, cmd_reported_success);
+ }
+
+ /* if we have an interval_id set, that means we are in the process of
+ * waiting for this cmd's next interval. instead of waiting, cancel
+ * the timer and execute the action immediately */
+ if (cmd->interval_id) {
+ g_source_remove(cmd->interval_id);
+ cmd->interval_id = 0;
+ recurring_helper(cmd);
+ }
+
+ return cmd;
+}
+
+/*!
+ * \internal
+ * \brief Execute an action using the (internal) ocf:pacemaker:remote agent
+ *
+ * \param[in] lrm_state Executor state object for remote connection
+ * \param[in] rsc_id Connection resource ID
+ * \param[in] action Action to execute
+ * \param[in] userdata String to copy and pass to execution callback
+ * \param[in] interval_ms Action interval (in milliseconds)
+ * \param[in] timeout_ms Action timeout (in milliseconds)
+ * \param[in] start_delay_ms Delay (in milliseconds) before executing action
+ * \param[in,out] params Connection resource parameters
+ * \param[out] call_id Where to store call ID on success
+ *
+ * \return Standard Pacemaker return code
+ * \note This takes ownership of \p params, which should not be used or freed
+ * after calling this function.
+ */
+int
+controld_execute_remote_agent(const lrm_state_t *lrm_state, const char *rsc_id,
+ const char *action, const char *userdata,
+ guint interval_ms, int timeout_ms,
+ int start_delay_ms, lrmd_key_value_t *params,
+ int *call_id)
+{
+ lrm_state_t *connection_rsc = NULL;
+ remote_ra_cmd_t *cmd = NULL;
+ remote_ra_data_t *ra_data = NULL;
+
+ *call_id = 0;
+
+ CRM_CHECK((lrm_state != NULL) && (rsc_id != NULL) && (action != NULL)
+ && (userdata != NULL) && (call_id != NULL),
+ lrmd_key_value_freeall(params); return EINVAL);
+
+ if (!is_remote_ra_supported_action(action)) {
+ lrmd_key_value_freeall(params);
+ return EOPNOTSUPP;
+ }
+
+ connection_rsc = lrm_state_find(rsc_id);
+ if (connection_rsc == NULL) {
+ lrmd_key_value_freeall(params);
+ return ENOTCONN;
+ }
+
+ remote_ra_data_init(connection_rsc);
+ ra_data = connection_rsc->remote_ra_data;
+
+ cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
+ if (cmd) {
+ *call_id = cmd->call_id;
+ lrmd_key_value_freeall(params);
+ return pcmk_rc_ok;
+ }
+
+ cmd = calloc(1, sizeof(remote_ra_cmd_t));
+ if (cmd == NULL) {
+ lrmd_key_value_freeall(params);
+ return ENOMEM;
+ }
+
+ cmd->owner = strdup(lrm_state->node_name);
+ cmd->rsc_id = strdup(rsc_id);
+ cmd->action = strdup(action);
+ cmd->userdata = strdup(userdata);
+ if ((cmd->owner == NULL) || (cmd->rsc_id == NULL) || (cmd->action == NULL)
+ || (cmd->userdata == NULL)) {
+ free_cmd(cmd);
+ lrmd_key_value_freeall(params);
+ return ENOMEM;
+ }
+
+ cmd->interval_ms = interval_ms;
+ cmd->timeout = timeout_ms;
+ cmd->start_delay = start_delay_ms;
+ cmd->params = params;
+ cmd->start_time = time(NULL);
+
+ cmd->call_id = generate_callid();
+
+ if (cmd->start_delay) {
+ cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
+ }
+
+ ra_data->cmds = g_list_append(ra_data->cmds, cmd);
+ mainloop_set_trigger(ra_data->work);
+
+ *call_id = cmd->call_id;
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief Immediately fail all monitors of a remote node, if proxied here
+ *
+ * \param[in] node_name Name of pacemaker_remote node
+ */
+void
+remote_ra_fail(const char *node_name)
+{
+ lrm_state_t *lrm_state = lrm_state_find(node_name);
+
+ if (lrm_state && lrm_state_is_connected(lrm_state)) {
+ remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
+
+ crm_info("Failing monitors on Pacemaker Remote node %s", node_name);
+ ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
+ ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
+ }
+}
+
+/* A guest node fencing implied by host fencing looks like:
+ *
+ * <pseudo_event id="103" operation="stonith" operation_key="stonith-lxc1-off"
+ * on_node="lxc1" on_node_uuid="lxc1">
+ * <attributes CRM_meta_on_node="lxc1" CRM_meta_on_node_uuid="lxc1"
+ * CRM_meta_stonith_action="off" crm_feature_set="3.0.12"/>
+ * <downed>
+ * <node id="lxc1"/>
+ * </downed>
+ * </pseudo_event>
+ */
+#define XPATH_PSEUDO_FENCE "/" XML_GRAPH_TAG_PSEUDO_EVENT \
+ "[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \
+ "/" XML_CIB_TAG_NODE
+
+/*!
+ * \internal
+ * \brief Check a pseudo-action for Pacemaker Remote node side effects
+ *
+ * \param[in,out] xml XML of pseudo-action to check
+ */
+void
+remote_ra_process_pseudo(xmlNode *xml)
+{
+ xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
+
+ if (numXpathResults(search) == 1) {
+ xmlNode *result = getXpathResult(search, 0);
+
+ /* Normally, we handle the necessary side effects of a guest node stop
+ * action when reporting the remote agent's result. However, if the stop
+ * is implied due to fencing, it will be a fencing pseudo-event, and
+ * there won't be a result to report. Handle that case here.
+ *
+ * This will result in a duplicate call to remote_node_down() if the
+ * guest stop was real instead of implied, but that shouldn't hurt.
+ *
+ * There is still one corner case that isn't handled: if a guest node
+ * isn't running any resources when its host is fenced, it will appear
+ * to be cleanly stopped, so there will be no pseudo-fence, and our
+ * peer cache state will be incorrect unless and until the guest is
+ * recovered.
+ */
+ if (result) {
+ const char *remote = ID(result);
+
+ if (remote) {
+ remote_node_down(remote, DOWN_ERASE_LRM);
+ }
+ }
+ }
+ freeXpathObject(search);
+}
+
+static void
+remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
+{
+ xmlNode *update, *state;
+ int call_opt;
+ crm_node_t *node;
+
+ call_opt = crmd_cib_smart_opt();
+ node = crm_remote_peer_get(lrm_state->node_name);
+ CRM_CHECK(node != NULL, return);
+ update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
+ state = create_node_state_update(node, node_update_none, update,
+ __func__);
+ crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0");
+ if (controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt,
+ NULL) == pcmk_rc_ok) {
+ /* TODO: still not 100% sure that async update will succeed ... */
+ if (maintenance) {
+ lrm_remote_set_flags(lrm_state, remote_in_maint);
+ } else {
+ lrm_remote_clear_flags(lrm_state, remote_in_maint);
+ }
+ }
+ free_xml(update);
+}
+
+#define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
+ "[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \
+ XML_GRAPH_TAG_MAINTENANCE
+
+/*!
+ * \internal
+ * \brief Check a pseudo-action holding updates for maintenance state
+ *
+ * \param[in,out] xml XML of pseudo-action to check
+ */
+void
+remote_ra_process_maintenance_nodes(xmlNode *xml)
+{
+ xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
+
+ if (numXpathResults(search) == 1) {
+ xmlNode *node;
+ int cnt = 0, cnt_remote = 0;
+
+ for (node =
+ first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE);
+ node != NULL; node = pcmk__xml_next(node)) {
+ lrm_state_t *lrm_state = lrm_state_find(ID(node));
+
+ cnt++;
+ if (lrm_state && lrm_state->remote_ra_data &&
+ pcmk_is_set(((remote_ra_data_t *) lrm_state->remote_ra_data)->status, remote_active)) {
+ int is_maint;
+
+ cnt_remote++;
+ pcmk__scan_min_int(crm_element_value(node, XML_NODE_IS_MAINTENANCE),
+ &is_maint, 0);
+ remote_ra_maintenance(lrm_state, is_maint);
+ }
+ }
+ crm_trace("Action holds %d nodes (%d remotes found) "
+ "adjusting maintenance-mode", cnt, cnt_remote);
+ }
+ freeXpathObject(search);
+}
+
+gboolean
+remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
+{
+ remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
+ return pcmk_is_set(ra_data->status, remote_in_maint);
+}
+
+gboolean
+remote_ra_controlling_guest(lrm_state_t * lrm_state)
+{
+ remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
+ return pcmk_is_set(ra_data->status, controlling_guest);
+}
diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c
new file mode 100644
index 0000000..912f9a5
--- /dev/null
+++ b/daemons/controld/controld_schedulerd.c
@@ -0,0 +1,506 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <unistd.h> /* pid_t, sleep, ssize_t */
+
+#include <crm/cib.h>
+#include <crm/cluster.h>
+#include <crm/common/xml.h>
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml_internal.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_schedulerd.h>
+
+#include <pacemaker-controld.h>
+
+static void handle_disconnect(void);
+
+static pcmk_ipc_api_t *schedulerd_api = NULL;
+
+/*!
+ * \internal
+ * \brief Close any scheduler connection and free associated memory
+ */
+void
+controld_shutdown_schedulerd_ipc(void)
+{
+ controld_clear_fsa_input_flags(R_PE_REQUIRED);
+ pcmk_disconnect_ipc(schedulerd_api);
+ handle_disconnect();
+
+ pcmk_free_ipc_api(schedulerd_api);
+ schedulerd_api = NULL;
+}
+
+/*!
+ * \internal
+ * \brief Save CIB query result to file, raising FSA error
+ *
+ * \param[in] msg Ignored
+ * \param[in] call_id Call ID of CIB query
+ * \param[in] rc Return code of CIB query
+ * \param[in,out] output Result of CIB query
+ * \param[in] user_data Unique identifier for filename
+ *
+ * \note This is intended to be called after a scheduler connection fails.
+ */
+static void
+save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output,
+ void *user_data)
+{
+ const char *id = user_data;
+
+ register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
+ CRM_CHECK(id != NULL, return);
+
+ if (rc == pcmk_ok) {
+ char *filename = crm_strdup_printf(PE_STATE_DIR "/pe-core-%s.bz2", id);
+
+ if (write_xml_file(output, filename, TRUE) < 0) {
+ crm_err("Could not save Cluster Information Base to %s after scheduler crash",
+ filename);
+ } else {
+ crm_notice("Saved Cluster Information Base to %s after scheduler crash",
+ filename);
+ }
+ free(filename);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Respond to scheduler connection failure
+ */
+static void
+handle_disconnect(void)
+{
+ // If we aren't connected to the scheduler, we can't expect a reply
+ controld_expect_sched_reply(NULL);
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_PE_REQUIRED)) {
+ int rc = pcmk_ok;
+ char *uuid_str = crm_generate_uuid();
+
+ crm_crit("Connection to the scheduler failed "
+ CRM_XS " uuid=%s", uuid_str);
+
+ /*
+ * The scheduler died...
+ *
+ * Save the current CIB so that we have a chance of
+ * figuring out what killed it.
+ *
+ * Delay raising the I_ERROR until the query below completes or
+ * 5s is up, whichever comes first.
+ *
+ */
+ rc = controld_globals.cib_conn->cmds->query(controld_globals.cib_conn,
+ NULL, NULL,
+ cib_scope_local);
+ fsa_register_cib_callback(rc, uuid_str, save_cib_contents);
+
+ } else {
+ crm_info("Connection to the scheduler released");
+ }
+
+ controld_clear_fsa_input_flags(R_PE_CONNECTED);
+ controld_trigger_fsa();
+ return;
+}
+
+static void
+handle_reply(pcmk_schedulerd_api_reply_t *reply)
+{
+ const char *msg_ref = NULL;
+
+ if (!AM_I_DC) {
+ return;
+ }
+
+ msg_ref = reply->data.graph.reference;
+
+ if (msg_ref == NULL) {
+ crm_err("%s - Ignoring calculation with no reference", CRM_OP_PECALC);
+
+ } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref,
+ pcmk__str_none)) {
+ ha_msg_input_t fsa_input;
+ xmlNode *crm_data_node;
+
+ controld_stop_sched_timer();
+
+ /* do_te_invoke (which will eventually process the fsa_input we are constructing
+ * here) requires that fsa_input.xml be non-NULL. That will only happen if
+ * copy_ha_msg_input (which is called by register_fsa_input_adv) sees the
+ * fsa_input.msg that it is expecting. The scheduler's IPC dispatch function
+ * gave us the values we need, we just need to put them into XML.
+ *
+ * The name of the top level element here is irrelevant. Nothing checks it.
+ */
+ fsa_input.msg = create_xml_node(NULL, "dummy-reply");
+ crm_xml_add(fsa_input.msg, XML_ATTR_REFERENCE, msg_ref);
+ crm_xml_add(fsa_input.msg, F_CRM_TGRAPH_INPUT, reply->data.graph.input);
+
+ crm_data_node = create_xml_node(fsa_input.msg, F_CRM_DATA);
+ add_node_copy(crm_data_node, reply->data.graph.tgraph);
+ register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
+
+ free_xml(fsa_input.msg);
+
+ } else {
+ crm_info("%s calculation %s is obsolete", CRM_OP_PECALC, msg_ref);
+ }
+}
+
+static void
+scheduler_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type,
+ crm_exit_t status, void *event_data, void *user_data)
+{
+ pcmk_schedulerd_api_reply_t *reply = event_data;
+
+ switch (event_type) {
+ case pcmk_ipc_event_disconnect:
+ handle_disconnect();
+ break;
+
+ case pcmk_ipc_event_reply:
+ handle_reply(reply);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static bool
+new_schedulerd_ipc_connection(void)
+{
+ int rc;
+
+ controld_set_fsa_input_flags(R_PE_REQUIRED);
+
+ if (schedulerd_api == NULL) {
+ rc = pcmk_new_ipc_api(&schedulerd_api, pcmk_ipc_schedulerd);
+
+ if (rc != pcmk_rc_ok) {
+ crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc));
+ return false;
+ }
+ }
+
+ pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL);
+
+ rc = pcmk_connect_ipc(schedulerd_api, pcmk_ipc_dispatch_main);
+ if (rc != pcmk_rc_ok) {
+ crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc));
+ return false;
+ }
+
+ controld_set_fsa_input_flags(R_PE_CONNECTED);
+ return true;
+}
+
+static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc,
+ xmlNode *output, void *user_data);
+
+/* A_PE_START, A_PE_STOP, O_PE_RESTART */
+void
+do_pe_control(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ if (pcmk_is_set(action, A_PE_STOP)) {
+ controld_clear_fsa_input_flags(R_PE_REQUIRED);
+ pcmk_disconnect_ipc(schedulerd_api);
+ handle_disconnect();
+ }
+ if (pcmk_is_set(action, A_PE_START)
+ && !pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
+
+ if (cur_state == S_STOPPING) {
+ crm_info("Ignoring request to connect to scheduler while shutting down");
+
+ } else if (!new_schedulerd_ipc_connection()) {
+ crm_warn("Could not connect to scheduler");
+ register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
+ }
+ }
+}
+
+static int fsa_pe_query = 0;
+static mainloop_timer_t *controld_sched_timer = NULL;
+
+// @TODO Make this a configurable cluster option if there's demand for it
+#define SCHED_TIMEOUT_MS (120000)
+
+/*!
+ * \internal
+ * \brief Handle a timeout waiting for scheduler reply
+ *
+ * \param[in] user_data Ignored
+ *
+ * \return FALSE (indicating that timer should not be restarted)
+ */
+static gboolean
+controld_sched_timeout(gpointer user_data)
+{
+ if (AM_I_DC) {
+ /* If this node is the DC but can't communicate with the scheduler, just
+ * exit (and likely get fenced) so this node doesn't interfere with any
+ * further DC elections.
+ *
+ * @TODO We could try something less drastic first, like disconnecting
+ * and reconnecting to the scheduler, but something is likely going
+ * seriously wrong, so perhaps it's better to just fail as quickly as
+ * possible.
+ */
+ crmd_exit(CRM_EX_FATAL);
+ }
+ return FALSE;
+}
+
+void
+controld_stop_sched_timer(void)
+{
+ if ((controld_sched_timer != NULL)
+ && (controld_globals.fsa_pe_ref != NULL)) {
+ crm_trace("Stopping timer for scheduler reply %s",
+ controld_globals.fsa_pe_ref);
+ }
+ mainloop_timer_stop(controld_sched_timer);
+}
+
+/*!
+ * \internal
+ * \brief Set the scheduler request currently being waited on
+ *
+ * \param[in] ref Request to expect reply to (or NULL for none)
+ *
+ * \note This function takes ownership of \p ref.
+ */
+void
+controld_expect_sched_reply(char *ref)
+{
+ if (ref) {
+ if (controld_sched_timer == NULL) {
+ controld_sched_timer = mainloop_timer_add("scheduler_reply_timer",
+ SCHED_TIMEOUT_MS, FALSE,
+ controld_sched_timeout,
+ NULL);
+ }
+ mainloop_timer_start(controld_sched_timer);
+ } else {
+ controld_stop_sched_timer();
+ }
+ free(controld_globals.fsa_pe_ref);
+ controld_globals.fsa_pe_ref = ref;
+}
+
+/*!
+ * \internal
+ * \brief Free the scheduler reply timer
+ */
+void
+controld_free_sched_timer(void)
+{
+ if (controld_sched_timer != NULL) {
+ mainloop_timer_del(controld_sched_timer);
+ controld_sched_timer = NULL;
+ }
+}
+
+/* A_PE_INVOKE */
+void
+do_pe_invoke(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ cib_t *cib_conn = controld_globals.cib_conn;
+
+ if (AM_I_DC == FALSE) {
+ crm_err("Not invoking scheduler because not DC: %s",
+ fsa_action2string(action));
+ return;
+ }
+
+ if (!pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ crm_err("Cannot shut down gracefully without the scheduler");
+ register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL);
+
+ } else {
+ crm_info("Waiting for the scheduler to connect");
+ crmd_fsa_stall(FALSE);
+ controld_set_fsa_action_flags(A_PE_START);
+ controld_trigger_fsa();
+ }
+ return;
+ }
+
+ if (cur_state != S_POLICY_ENGINE) {
+ crm_notice("Not invoking scheduler because in state %s",
+ fsa_state2string(cur_state));
+ return;
+ }
+ if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
+ crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!");
+
+ /* start the join from scratch */
+ register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL);
+ return;
+ }
+
+ fsa_pe_query = cib_conn->cmds->query(cib_conn, NULL, NULL, cib_scope_local);
+
+ crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query,
+ fsa_state2string(controld_globals.fsa_state));
+
+ controld_expect_sched_reply(NULL);
+ fsa_register_cib_callback(fsa_pe_query, NULL, do_pe_invoke_callback);
+}
+
+static void
+force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value)
+{
+ int max = 0;
+ int lpc = 0;
+ const char *xpath_base = NULL;
+ char *xpath_string = NULL;
+ xmlXPathObjectPtr xpathObj = NULL;
+
+ xpath_base = pcmk_cib_xpath_for(XML_CIB_TAG_CRMCONFIG);
+ if (xpath_base == NULL) {
+ crm_err(XML_CIB_TAG_CRMCONFIG " CIB element not known (bug?)");
+ return;
+ }
+
+ xpath_string = crm_strdup_printf("%s//%s//nvpair[@name='%s']",
+ xpath_base, XML_CIB_TAG_PROPSET,
+ attr_name);
+ xpathObj = xpath_search(xml, xpath_string);
+ max = numXpathResults(xpathObj);
+ free(xpath_string);
+
+ for (lpc = 0; lpc < max; lpc++) {
+ xmlNode *match = getXpathResult(xpathObj, lpc);
+ crm_trace("Forcing %s/%s = %s", ID(match), attr_name, attr_value);
+ crm_xml_add(match, XML_NVPAIR_ATTR_VALUE, attr_value);
+ }
+
+ if(max == 0) {
+ xmlNode *configuration = NULL;
+ xmlNode *crm_config = NULL;
+ xmlNode *cluster_property_set = NULL;
+
+ crm_trace("Creating %s-%s for %s=%s",
+ CIB_OPTIONS_FIRST, attr_name, attr_name, attr_value);
+
+ configuration = pcmk__xe_match(xml, XML_CIB_TAG_CONFIGURATION, NULL,
+ NULL);
+ if (configuration == NULL) {
+ configuration = create_xml_node(xml, XML_CIB_TAG_CONFIGURATION);
+ }
+
+ crm_config = pcmk__xe_match(configuration, XML_CIB_TAG_CRMCONFIG, NULL,
+ NULL);
+ if (crm_config == NULL) {
+ crm_config = create_xml_node(configuration, XML_CIB_TAG_CRMCONFIG);
+ }
+
+ cluster_property_set = pcmk__xe_match(crm_config, XML_CIB_TAG_PROPSET,
+ NULL, NULL);
+ if (cluster_property_set == NULL) {
+ cluster_property_set = create_xml_node(crm_config, XML_CIB_TAG_PROPSET);
+ crm_xml_add(cluster_property_set, XML_ATTR_ID, CIB_OPTIONS_FIRST);
+ }
+
+ xml = create_xml_node(cluster_property_set, XML_CIB_TAG_NVPAIR);
+
+ crm_xml_set_id(xml, "%s-%s", CIB_OPTIONS_FIRST, attr_name);
+ crm_xml_add(xml, XML_NVPAIR_ATTR_NAME, attr_name);
+ crm_xml_add(xml, XML_NVPAIR_ATTR_VALUE, attr_value);
+ }
+ freeXpathObject(xpathObj);
+}
+
+static void
+do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ char *ref = NULL;
+ pid_t watchdog = pcmk__locate_sbd();
+
+ if (rc != pcmk_ok) {
+ crm_err("Could not retrieve the Cluster Information Base: %s "
+ CRM_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id);
+ register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
+ return;
+
+ } else if (call_id != fsa_pe_query) {
+ crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query);
+ return;
+
+ } else if (!AM_I_DC
+ || !pcmk_is_set(controld_globals.fsa_input_register,
+ R_PE_CONNECTED)) {
+ crm_debug("No need to invoke the scheduler anymore");
+ return;
+
+ } else if (controld_globals.fsa_state != S_POLICY_ENGINE) {
+ crm_debug("Discarding scheduler request in state: %s",
+ fsa_state2string(controld_globals.fsa_state));
+ return;
+
+ /* this callback counts as 1 */
+ } else if (num_cib_op_callbacks() > 1) {
+ crm_debug("Re-asking for the CIB: %d other peer updates still pending",
+ (num_cib_op_callbacks() - 1));
+ sleep(1);
+ controld_set_fsa_action_flags(A_PE_INVOKE);
+ controld_trigger_fsa();
+ return;
+ }
+
+ CRM_LOG_ASSERT(output != NULL);
+
+ /* Refresh the remote node cache and the known node cache when the
+ * scheduler is invoked */
+ pcmk__refresh_node_caches_from_cib(output);
+
+ crm_xml_add(output, XML_ATTR_DC_UUID, controld_globals.our_uuid);
+ pcmk__xe_set_bool_attr(output, XML_ATTR_HAVE_QUORUM,
+ pcmk_is_set(controld_globals.flags,
+ controld_has_quorum));
+
+ force_local_option(output, XML_ATTR_HAVE_WATCHDOG, pcmk__btoa(watchdog));
+
+ if (pcmk_is_set(controld_globals.flags, controld_ever_had_quorum)
+ && !crm_have_quorum) {
+ crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1);
+ }
+
+ rc = pcmk_rc2legacy(pcmk_schedulerd_api_graph(schedulerd_api, output, &ref));
+
+ if (rc < 0) {
+ crm_err("Could not contact the scheduler: %s " CRM_XS " rc=%d",
+ pcmk_strerror(rc), rc);
+ register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
+ } else {
+ CRM_ASSERT(ref != NULL);
+ controld_expect_sched_reply(ref);
+ crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, "
+ "quorate=%s", fsa_pe_query, controld_globals.fsa_pe_ref,
+ crm_peer_seq, pcmk__btoa(pcmk_is_set(controld_globals.flags,
+ controld_has_quorum)));
+ }
+}
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
new file mode 100644
index 0000000..d8cfcad
--- /dev/null
+++ b/daemons/controld/controld_te_actions.c
@@ -0,0 +1,746 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <crm/crm.h>
+#include <crm/cib.h>
+#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_free_event()
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/cluster.h>
+
+#include <pacemaker-internal.h>
+#include <pacemaker-controld.h>
+
+static GHashTable *te_targets = NULL;
+void send_rsc_command(pcmk__graph_action_t *action);
+static void te_update_job_count(pcmk__graph_action_t *action, int offset);
+
+static void
+te_start_action_timer(const pcmk__graph_t *graph, pcmk__graph_action_t *action)
+{
+ action->timer = g_timeout_add(action->timeout + graph->network_delay,
+ action_timer_callback, (void *) action);
+ CRM_ASSERT(action->timer != 0);
+}
+
+/*!
+ * \internal
+ * \brief Execute a graph pseudo-action
+ *
+ * \param[in,out] graph Transition graph being executed
+ * \param[in,out] pseudo Pseudo-action to execute
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+execute_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *pseudo)
+{
+ const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK);
+
+ /* send to peers as well? */
+ if (pcmk__str_eq(task, CRM_OP_MAINTENANCE_NODES, pcmk__str_casei)) {
+ GHashTableIter iter;
+ crm_node_t *node = NULL;
+
+ g_hash_table_iter_init(&iter, crm_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
+ xmlNode *cmd = NULL;
+
+ if (pcmk__str_eq(controld_globals.our_nodename, node->uname,
+ pcmk__str_casei)) {
+ continue;
+ }
+
+ cmd = create_request(task, pseudo->xml, node->uname,
+ CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
+ send_cluster_message(node, crm_msg_crmd, cmd, FALSE);
+ free_xml(cmd);
+ }
+
+ remote_ra_process_maintenance_nodes(pseudo->xml);
+ } else {
+ /* Check action for Pacemaker Remote node side effects */
+ remote_ra_process_pseudo(pseudo->xml);
+ }
+
+ crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
+ crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY));
+ te_action_confirmed(pseudo, graph);
+ return pcmk_rc_ok;
+}
+
+static int
+get_target_rc(pcmk__graph_action_t *action)
+{
+ int exit_status;
+
+ pcmk__scan_min_int(crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC),
+ &exit_status, 0);
+ return exit_status;
+}
+
+/*!
+ * \internal
+ * \brief Execute a cluster action from a transition graph
+ *
+ * \param[in,out] graph Transition graph being executed
+ * \param[in,out] action Cluster action to execute
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
+{
+ char *counter = NULL;
+ xmlNode *cmd = NULL;
+ gboolean is_local = FALSE;
+
+ const char *id = NULL;
+ const char *task = NULL;
+ const char *value = NULL;
+ const char *on_node = NULL;
+ const char *router_node = NULL;
+
+ gboolean rc = TRUE;
+ gboolean no_wait = FALSE;
+
+ id = ID(action->xml);
+ CRM_CHECK(!pcmk__str_empty(id), return EPROTO);
+
+ task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
+ CRM_CHECK(!pcmk__str_empty(task), return EPROTO);
+
+ on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ CRM_CHECK(!pcmk__str_empty(on_node), return pcmk_rc_node_unknown);
+
+ router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
+ if (router_node == NULL) {
+ router_node = on_node;
+ if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_none)) {
+ const char *mode = crm_element_value(action->xml, PCMK__XA_MODE);
+
+ if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_none)) {
+ router_node = controld_globals.our_nodename;
+ }
+ }
+ }
+
+ if (pcmk__str_eq(router_node, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ is_local = TRUE;
+ }
+
+ value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
+ if (crm_is_true(value)) {
+ no_wait = TRUE;
+ }
+
+ crm_info("Handling controller request '%s' (%s on %s)%s%s",
+ id, task, on_node, (is_local? " locally" : ""),
+ (no_wait? " without waiting" : ""));
+
+ if (is_local && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_none)) {
+ /* defer until everything else completes */
+ crm_info("Controller request '%s' is a local shutdown", id);
+ graph->completion_action = pcmk__graph_shutdown;
+ graph->abort_reason = "local shutdown";
+ te_action_confirmed(action, graph);
+ return pcmk_rc_ok;
+
+ } else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_none)) {
+ crm_node_t *peer = crm_get_peer(0, router_node);
+
+ pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN);
+ }
+
+ cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
+
+ counter = pcmk__transition_key(controld_globals.transition_graph->id,
+ action->id, get_target_rc(action),
+ controld_globals.te_uuid);
+ crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter);
+
+ rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE);
+ free(counter);
+ free_xml(cmd);
+
+ if (rc == FALSE) {
+ crm_err("Action %d failed: send", action->id);
+ return ECOMM;
+
+ } else if (no_wait) {
+ te_action_confirmed(action, graph);
+
+ } else {
+ if (action->timeout <= 0) {
+ crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead",
+ action->id, task, on_node, action->timeout, graph->network_delay);
+ action->timeout = (int) graph->network_delay;
+ }
+ te_start_action_timer(graph, action);
+ }
+
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief Synthesize an executor event for a resource action timeout
+ *
+ * \param[in] action Resource action that timed out
+ * \param[in] target_rc Expected result of action that timed out
+ *
+ * Synthesize an executor event for a resource action timeout. (If the executor
+ * gets a timeout while waiting for a resource action to complete, that will be
+ * reported via the usual callback. This timeout means we didn't hear from the
+ * executor itself or the controller that relayed the action to the executor.)
+ *
+ * \return Newly created executor event for result of \p action
+ * \note The caller is responsible for freeing the return value using
+ * lrmd_free_event().
+ */
+static lrmd_event_data_t *
+synthesize_timeout_event(const pcmk__graph_action_t *action, int target_rc)
+{
+ lrmd_event_data_t *op = NULL;
+ const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ const char *reason = NULL;
+ char *dynamic_reason = NULL;
+
+ if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) {
+ reason = "Local executor did not return result in time";
+ } else {
+ const char *router_node = NULL;
+
+ router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
+ if (router_node == NULL) {
+ router_node = target;
+ }
+ dynamic_reason = crm_strdup_printf("Controller on %s did not return "
+ "result in time", router_node);
+ reason = dynamic_reason;
+ }
+
+ op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
+ PCMK_OCF_UNKNOWN_ERROR, reason);
+ op->call_id = -1;
+ op->user_data = pcmk__transition_key(controld_globals.transition_graph->id,
+ action->id, target_rc,
+ controld_globals.te_uuid);
+ free(dynamic_reason);
+ return op;
+}
+
+static void
+controld_record_action_event(pcmk__graph_action_t *action,
+ lrmd_event_data_t *op)
+{
+ cib_t *cib_conn = controld_globals.cib_conn;
+
+ xmlNode *state = NULL;
+ xmlNode *rsc = NULL;
+ xmlNode *action_rsc = NULL;
+
+ int rc = pcmk_ok;
+
+ const char *rsc_id = NULL;
+ const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
+ const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
+
+ int target_rc = get_target_rc(action);
+
+ action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE);
+ if (action_rsc == NULL) {
+ return;
+ }
+
+ rsc_id = ID(action_rsc);
+ CRM_CHECK(rsc_id != NULL,
+ crm_log_xml_err(action->xml, "Bad:action"); return);
+
+/*
+ update the CIB
+
+<node_state id="hadev">
+ <lrm>
+ <lrm_resources>
+ <lrm_resource id="rsc2" last_op="start" op_code="0" target="hadev"/>
+*/
+
+ state = create_xml_node(NULL, XML_CIB_TAG_STATE);
+
+ crm_xml_add(state, XML_ATTR_ID, target_uuid);
+ crm_xml_add(state, XML_ATTR_UNAME, target);
+
+ rsc = create_xml_node(state, XML_CIB_TAG_LRM);
+ crm_xml_add(rsc, XML_ATTR_ID, target_uuid);
+
+ rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES);
+ rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE);
+ crm_xml_add(rsc, XML_ATTR_ID, rsc_id);
+
+
+ crm_copy_xml_element(action_rsc, rsc, XML_ATTR_TYPE);
+ crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS);
+ crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER);
+
+ pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
+ __func__);
+
+ rc = cib_conn->cmds->modify(cib_conn, XML_CIB_TAG_STATUS, state,
+ cib_scope_local);
+ fsa_register_cib_callback(rc, NULL, cib_action_updated);
+ free_xml(state);
+
+ crm_trace("Sent CIB update (call ID %d) for synthesized event of action %d (%s on %s)",
+ rc, action->id, task_uuid, target);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update);
+}
+
+void
+controld_record_action_timeout(pcmk__graph_action_t *action)
+{
+ lrmd_event_data_t *op = NULL;
+
+ const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
+
+ int target_rc = get_target_rc(action);
+
+ crm_warn("%s %d: %s on %s timed out",
+ crm_element_name(action->xml), action->id, task_uuid, target);
+
+ op = synthesize_timeout_event(action, target_rc);
+ controld_record_action_event(action, op);
+ lrmd_free_event(op);
+}
+
+/*!
+ * \internal
+ * \brief Execute a resource action from a transition graph
+ *
+ * \param[in,out] graph Transition graph being executed
+ * \param[in,out] action Resource action to execute
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+execute_rsc_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
+{
+ /* never overwrite stop actions in the CIB with
+ * anything other than completed results
+ *
+ * Writing pending stops makes it look like the
+ * resource is running again
+ */
+ xmlNode *cmd = NULL;
+ xmlNode *rsc_op = NULL;
+
+ gboolean rc = TRUE;
+ gboolean no_wait = FALSE;
+ gboolean is_local = FALSE;
+
+ char *counter = NULL;
+ const char *task = NULL;
+ const char *value = NULL;
+ const char *on_node = NULL;
+ const char *router_node = NULL;
+ const char *task_uuid = NULL;
+
+ CRM_ASSERT(action != NULL);
+ CRM_ASSERT(action->xml != NULL);
+
+ pcmk__clear_graph_action_flags(action, pcmk__graph_action_executed);
+ on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+
+ CRM_CHECK(!pcmk__str_empty(on_node),
+ crm_err("Corrupted command(id=%s) %s: no node",
+ ID(action->xml), pcmk__s(task, "without task"));
+ return pcmk_rc_node_unknown);
+
+ rsc_op = action->xml;
+ task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
+ task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
+ router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE);
+
+ if (!router_node) {
+ router_node = on_node;
+ }
+
+ counter = pcmk__transition_key(controld_globals.transition_graph->id,
+ action->id, get_target_rc(action),
+ controld_globals.te_uuid);
+ crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter);
+
+ if (pcmk__str_eq(router_node, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ is_local = TRUE;
+ }
+
+ value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
+ if (crm_is_true(value)) {
+ no_wait = TRUE;
+ }
+
+ crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d",
+ task, task_uuid, (is_local? " locally" : ""), on_node,
+ (no_wait? " without waiting" : ""), action->id);
+
+ cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node,
+ CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);
+
+ if (is_local) {
+ /* shortcut local resource commands */
+ ha_msg_input_t data = {
+ .msg = cmd,
+ .xml = rsc_op,
+ };
+
+ fsa_data_t msg = {
+ .id = 0,
+ .data = &data,
+ .data_type = fsa_dt_ha_msg,
+ .fsa_input = I_NULL,
+ .fsa_cause = C_FSA_INTERNAL,
+ .actions = A_LRM_INVOKE,
+ .origin = __func__,
+ };
+
+ do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, controld_globals.fsa_state,
+ I_NULL, &msg);
+
+ } else {
+ rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE);
+ }
+
+ free(counter);
+ free_xml(cmd);
+
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_executed);
+
+ if (rc == FALSE) {
+ crm_err("Action %d failed: send", action->id);
+ return ECOMM;
+
+ } else if (no_wait) {
+ /* Just mark confirmed. Don't bump the job count only to immediately
+ * decrement it.
+ */
+ crm_info("Action %d confirmed - no wait", action->id);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
+ pcmk__update_graph(controld_globals.transition_graph, action);
+ trigger_graph();
+
+ } else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
+ crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.",
+ action->id, task, task_uuid, on_node, action->timeout);
+ } else {
+ if (action->timeout <= 0) {
+ crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead",
+ action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
+ action->timeout = (int) graph->network_delay;
+ }
+ te_update_job_count(action, 1);
+ te_start_action_timer(graph, action);
+ }
+
+ return pcmk_rc_ok;
+}
+
+struct te_peer_s
+{
+ char *name;
+ int jobs;
+ int migrate_jobs;
+};
+
+static void te_peer_free(gpointer p)
+{
+ struct te_peer_s *peer = p;
+
+ free(peer->name);
+ free(peer);
+}
+
+void te_reset_job_counts(void)
+{
+ GHashTableIter iter;
+ struct te_peer_s *peer = NULL;
+
+ if(te_targets == NULL) {
+ te_targets = pcmk__strkey_table(NULL, te_peer_free);
+ }
+
+ g_hash_table_iter_init(&iter, te_targets);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) {
+ peer->jobs = 0;
+ peer->migrate_jobs = 0;
+ }
+}
+
+static void
+te_update_job_count_on(const char *target, int offset, bool migrate)
+{
+ struct te_peer_s *r = NULL;
+
+ if(target == NULL || te_targets == NULL) {
+ return;
+ }
+
+ r = g_hash_table_lookup(te_targets, target);
+ if(r == NULL) {
+ r = calloc(1, sizeof(struct te_peer_s));
+ r->name = strdup(target);
+ g_hash_table_insert(te_targets, r->name, r);
+ }
+
+ r->jobs += offset;
+ if(migrate) {
+ r->migrate_jobs += offset;
+ }
+ crm_trace("jobs[%s] = %d", target, r->jobs);
+}
+
+static void
+te_update_job_count(pcmk__graph_action_t *action, int offset)
+{
+ const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
+ const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+
+ if ((action->type != pcmk__rsc_graph_action) || (target == NULL)) {
+ /* No limit on these */
+ return;
+ }
+
+ /* if we have a router node, this means the action is performing
+ * on a remote node. For now, we count all actions occurring on a
+ * remote node against the job list on the cluster node hosting
+ * the connection resources */
+ target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
+
+ if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE,
+ CRMD_ACTION_MIGRATED, NULL)) {
+
+ const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
+ const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
+
+ te_update_job_count_on(t1, offset, TRUE);
+ te_update_job_count_on(t2, offset, TRUE);
+ return;
+ } else if (target == NULL) {
+ target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ }
+
+ te_update_job_count_on(target, offset, FALSE);
+}
+
+/*!
+ * \internal
+ * \brief Check whether a graph action is allowed to be executed on a node
+ *
+ * \param[in] graph Transition graph being executed
+ * \param[in] action Graph action being executed
+ * \param[in] target Name of node where action should be executed
+ *
+ * \return true if action is allowed, otherwise false
+ */
+static bool
+allowed_on_node(const pcmk__graph_t *graph, const pcmk__graph_action_t *action,
+ const char *target)
+{
+ int limit = 0;
+ struct te_peer_s *r = NULL;
+ const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
+ const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
+
+ if(target == NULL) {
+ /* No limit on these */
+ return true;
+
+ } else if(te_targets == NULL) {
+ return false;
+ }
+
+ r = g_hash_table_lookup(te_targets, target);
+ limit = throttle_get_job_limit(target);
+
+ if(r == NULL) {
+ r = calloc(1, sizeof(struct te_peer_s));
+ r->name = strdup(target);
+ g_hash_table_insert(te_targets, r->name, r);
+ }
+
+ if(limit <= r->jobs) {
+ crm_trace("Peer %s is over their job limit of %d (%d): deferring %s",
+ target, limit, r->jobs, id);
+ return false;
+
+ } else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) {
+ if (pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) {
+ crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s",
+ target, graph->migration_limit, r->migrate_jobs, id);
+ return false;
+ }
+ }
+
+ crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit);
+
+ return true;
+}
+
+/*!
+ * \internal
+ * \brief Check whether a graph action is allowed to be executed
+ *
+ * \param[in] graph Transition graph being executed
+ * \param[in] action Graph action being executed
+ *
+ * \return true if action is allowed, otherwise false
+ */
+static bool
+graph_action_allowed(pcmk__graph_t *graph, pcmk__graph_action_t *action)
+{
+ const char *target = NULL;
+ const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
+
+ if (action->type != pcmk__rsc_graph_action) {
+ /* No limit on these */
+ return true;
+ }
+
+ /* if we have a router node, this means the action is performing
+ * on a remote node. For now, we count all actions occurring on a
+ * remote node against the job list on the cluster node hosting
+ * the connection resources */
+ target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
+
+ if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE,
+ CRMD_ACTION_MIGRATED, NULL)) {
+ target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
+ if (!allowed_on_node(graph, action, target)) {
+ return false;
+ }
+
+ target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
+
+ } else if (target == NULL) {
+ target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ }
+
+ return allowed_on_node(graph, action, target);
+}
+
+/*!
+ * \brief Confirm a graph action (and optionally update graph)
+ *
+ * \param[in,out] action Action to confirm
+ * \param[in,out] graph Update and trigger this graph (if non-NULL)
+ */
+void
+te_action_confirmed(pcmk__graph_action_t *action, pcmk__graph_t *graph)
+{
+ if (!pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
+ if ((action->type == pcmk__rsc_graph_action)
+ && (crm_element_value(action->xml, XML_LRM_ATTR_TARGET) != NULL)) {
+ te_update_job_count(action, -1);
+ }
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
+ }
+ if (graph) {
+ pcmk__update_graph(graph, action);
+ trigger_graph();
+ }
+}
+
+static pcmk__graph_functions_t te_graph_fns = {
+ execute_pseudo_action,
+ execute_rsc_action,
+ execute_cluster_action,
+ controld_execute_fence_action,
+ graph_action_allowed,
+};
+
+/*
+ * \internal
+ * \brief Register the transitioner's graph functions with \p libpacemaker
+ */
+void
+controld_register_graph_functions(void)
+{
+ pcmk__set_graph_functions(&te_graph_fns);
+}
+
+void
+notify_crmd(pcmk__graph_t *graph)
+{
+ const char *type = "unknown";
+ enum crmd_fsa_input event = I_NULL;
+
+ crm_debug("Processing transition completion in state %s",
+ fsa_state2string(controld_globals.fsa_state));
+
+ CRM_CHECK(graph->complete, graph->complete = true);
+
+ switch (graph->completion_action) {
+ case pcmk__graph_wait:
+ type = "stop";
+ if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
+ event = I_TE_SUCCESS;
+ }
+ break;
+ case pcmk__graph_done:
+ type = "done";
+ if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
+ event = I_TE_SUCCESS;
+ }
+ break;
+
+ case pcmk__graph_restart:
+ type = "restart";
+ if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
+ if (controld_get_period_transition_timer() > 0) {
+ controld_stop_transition_timer();
+ controld_start_transition_timer();
+ } else {
+ event = I_PE_CALC;
+ }
+
+ } else if (controld_globals.fsa_state == S_POLICY_ENGINE) {
+ controld_set_fsa_action_flags(A_PE_INVOKE);
+ controld_trigger_fsa();
+ }
+ break;
+
+ case pcmk__graph_shutdown:
+ type = "shutdown";
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ event = I_STOP;
+
+ } else {
+ crm_err("We didn't ask to be shut down, yet the scheduler is telling us to");
+ event = I_TERMINATE;
+ }
+ }
+
+ crm_debug("Transition %d status: %s - %s", graph->id, type,
+ pcmk__s(graph->abort_reason, "unspecified reason"));
+
+ graph->abort_reason = NULL;
+ graph->completion_action = pcmk__graph_done;
+
+ if (event != I_NULL) {
+ register_fsa_input(C_FSA_INTERNAL, event, NULL);
+ } else {
+ controld_trigger_fsa();
+ }
+}
diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c
new file mode 100644
index 0000000..cf9de83
--- /dev/null
+++ b/daemons/controld/controld_te_callbacks.c
@@ -0,0 +1,689 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/stat.h>
+
+#include <crm/crm.h>
+#include <crm/common/xml.h>
+#include <crm/common/xml_internal.h>
+#include <crm/msg_xml.h>
+#include <crm/cluster.h> /* For ONLINESTATUS etc */
+
+#include <pacemaker-controld.h>
+
+void te_update_confirm(const char *event, xmlNode * msg);
+
+#define RSC_OP_PREFIX "//" XML_TAG_DIFF_ADDED "//" XML_TAG_CIB \
+ "//" XML_LRM_TAG_RSC_OP "[@" XML_ATTR_ID "='"
+
+// An explicit shutdown-lock of 0 means the lock has been cleared
+static bool
+shutdown_lock_cleared(xmlNode *lrm_resource)
+{
+ time_t shutdown_lock = 0;
+
+ return (crm_element_value_epoch(lrm_resource, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
+ &shutdown_lock) == pcmk_ok)
+ && (shutdown_lock == 0);
+}
+
+static void
+te_update_diff_v1(const char *event, xmlNode *diff)
+{
+ int lpc, max;
+ xmlXPathObject *xpathObj = NULL;
+ GString *rsc_op_xpath = NULL;
+
+ CRM_CHECK(diff != NULL, return);
+
+ pcmk__output_set_log_level(controld_globals.logger_out, LOG_TRACE);
+ controld_globals.logger_out->message(controld_globals.logger_out,
+ "xml-patchset", diff);
+
+ if (cib__config_changed_v1(NULL, NULL, &diff)) {
+ abort_transition(INFINITY, pcmk__graph_restart, "Non-status change",
+ diff);
+ goto bail; /* configuration changed */
+ }
+
+ /* Tickets Attributes - Added/Updated */
+ xpathObj =
+ xpath_search(diff,
+ "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_TICKETS);
+ if (numXpathResults(xpathObj) > 0) {
+ xmlNode *aborted = getXpathResult(xpathObj, 0);
+
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Ticket attribute: update", aborted);
+ goto bail;
+
+ }
+ freeXpathObject(xpathObj);
+
+ /* Tickets Attributes - Removed */
+ xpathObj =
+ xpath_search(diff,
+ "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS);
+ if (numXpathResults(xpathObj) > 0) {
+ xmlNode *aborted = getXpathResult(xpathObj, 0);
+
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Ticket attribute: removal", aborted);
+ goto bail;
+ }
+ freeXpathObject(xpathObj);
+
+ /* Transient Attributes - Removed */
+ xpathObj =
+ xpath_search(diff,
+ "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//"
+ XML_TAG_TRANSIENT_NODEATTRS);
+ if (numXpathResults(xpathObj) > 0) {
+ xmlNode *aborted = getXpathResult(xpathObj, 0);
+
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Transient attribute: removal", aborted);
+ goto bail;
+
+ }
+ freeXpathObject(xpathObj);
+
+ // Check for lrm_resource entries
+ xpathObj = xpath_search(diff,
+ "//" F_CIB_UPDATE_RESULT
+ "//" XML_TAG_DIFF_ADDED
+ "//" XML_LRM_TAG_RESOURCE);
+ max = numXpathResults(xpathObj);
+
+ /*
+ * Updates by, or in response to, graph actions will never affect more than
+ * one resource at a time, so such updates indicate an LRM refresh. In that
+ * case, start a new transition rather than check each result individually,
+ * which can result in _huge_ speedups in large clusters.
+ *
+ * Unfortunately, we can only do so when there are no pending actions.
+ * Otherwise, we could mistakenly throw away those results here, and
+ * the cluster will stall waiting for them and time out the operation.
+ */
+ if ((controld_globals.transition_graph->pending == 0) && (max > 1)) {
+ crm_debug("Ignoring resource operation updates due to history refresh of %d resources",
+ max);
+ crm_log_xml_trace(diff, "lrm-refresh");
+ abort_transition(INFINITY, pcmk__graph_restart, "History refresh",
+ NULL);
+ goto bail;
+ }
+
+ if (max == 1) {
+ xmlNode *lrm_resource = getXpathResult(xpathObj, 0);
+
+ if (shutdown_lock_cleared(lrm_resource)) {
+ // @TODO would be more efficient to abort once after transition done
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Shutdown lock cleared", lrm_resource);
+ // Still process results, so we stop timers and update failcounts
+ }
+ }
+ freeXpathObject(xpathObj);
+
+ /* Process operation updates */
+ xpathObj =
+ xpath_search(diff,
+ "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP);
+ max = numXpathResults(xpathObj);
+ if (max > 0) {
+ int lpc = 0;
+
+ for (lpc = 0; lpc < max; lpc++) {
+ xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
+ const char *node = get_node_id(rsc_op);
+
+ process_graph_event(rsc_op, node);
+ }
+ }
+ freeXpathObject(xpathObj);
+
+ /* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */
+ xpathObj = xpath_search(diff, "//" XML_TAG_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP);
+ max = numXpathResults(xpathObj);
+ for (lpc = 0; lpc < max; lpc++) {
+ const char *op_id = NULL;
+ xmlXPathObject *op_match = NULL;
+ xmlNode *match = getXpathResult(xpathObj, lpc);
+
+ CRM_LOG_ASSERT(match != NULL);
+ if(match == NULL) { continue; };
+
+ op_id = ID(match);
+
+ if (rsc_op_xpath == NULL) {
+ rsc_op_xpath = g_string_new(RSC_OP_PREFIX);
+ } else {
+ g_string_truncate(rsc_op_xpath, sizeof(RSC_OP_PREFIX) - 1);
+ }
+ pcmk__g_strcat(rsc_op_xpath, op_id, "']", NULL);
+
+ op_match = xpath_search(diff, (const char *) rsc_op_xpath->str);
+ if (numXpathResults(op_match) == 0) {
+ /* Prevent false positives by matching cancelations too */
+ const char *node = get_node_id(match);
+ pcmk__graph_action_t *cancelled = get_cancel_action(op_id, node);
+
+ if (cancelled == NULL) {
+ crm_debug("No match for deleted action %s (%s on %s)",
+ (const char *) rsc_op_xpath->str, op_id, node);
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Resource op removal", match);
+ freeXpathObject(op_match);
+ goto bail;
+
+ } else {
+ crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d",
+ op_id, node, cancelled->id);
+ }
+ }
+
+ freeXpathObject(op_match);
+ }
+
+ bail:
+ freeXpathObject(xpathObj);
+ if (rsc_op_xpath != NULL) {
+ g_string_free(rsc_op_xpath, TRUE);
+ }
+}
+
+static void
+process_lrm_resource_diff(xmlNode *lrm_resource, const char *node)
+{
+ for (xmlNode *rsc_op = pcmk__xml_first_child(lrm_resource); rsc_op != NULL;
+ rsc_op = pcmk__xml_next(rsc_op)) {
+ process_graph_event(rsc_op, node);
+ }
+ if (shutdown_lock_cleared(lrm_resource)) {
+ // @TODO would be more efficient to abort once after transition done
+ abort_transition(INFINITY, pcmk__graph_restart, "Shutdown lock cleared",
+ lrm_resource);
+ }
+}
+
+static void
+process_resource_updates(const char *node, xmlNode *xml, xmlNode *change,
+ const char *op, const char *xpath)
+{
+ xmlNode *rsc = NULL;
+
+ if (xml == NULL) {
+ return;
+ }
+
+ if (strcmp(TYPE(xml), XML_CIB_TAG_LRM) == 0) {
+ xml = first_named_child(xml, XML_LRM_TAG_RESOURCES);
+ CRM_CHECK(xml != NULL, return);
+ }
+
+ CRM_CHECK(strcmp(TYPE(xml), XML_LRM_TAG_RESOURCES) == 0, return);
+
+ /*
+ * Updates by, or in response to, TE actions will never contain updates
+ * for more than one resource at a time, so such updates indicate an
+ * LRM refresh.
+ *
+ * In that case, start a new transition rather than check each result
+ * individually, which can result in _huge_ speedups in large clusters.
+ *
+ * Unfortunately, we can only do so when there are no pending actions.
+ * Otherwise, we could mistakenly throw away those results here, and
+ * the cluster will stall waiting for them and time out the operation.
+ */
+ if ((controld_globals.transition_graph->pending == 0)
+ && (xml->children != NULL) && (xml->children->next != NULL)) {
+
+ crm_log_xml_trace(change, "lrm-refresh");
+ abort_transition(INFINITY, pcmk__graph_restart, "History refresh",
+ NULL);
+ return;
+ }
+
+ for (rsc = pcmk__xml_first_child(xml); rsc != NULL;
+ rsc = pcmk__xml_next(rsc)) {
+ crm_trace("Processing %s", ID(rsc));
+ process_lrm_resource_diff(rsc, node);
+ }
+}
+
+static char *extract_node_uuid(const char *xpath)
+{
+ char *mutable_path = strdup(xpath);
+ char *node_uuid = NULL;
+ char *search = NULL;
+ char *match = NULL;
+
+ match = strstr(mutable_path, "node_state[@" XML_ATTR_ID "=\'");
+ if (match == NULL) {
+ free(mutable_path);
+ return NULL;
+ }
+ match += strlen("node_state[@" XML_ATTR_ID "=\'");
+
+ search = strchr(match, '\'');
+ if (search == NULL) {
+ free(mutable_path);
+ return NULL;
+ }
+ search[0] = 0;
+
+ node_uuid = strdup(match);
+ free(mutable_path);
+ return node_uuid;
+}
+
+static void
+abort_unless_down(const char *xpath, const char *op, xmlNode *change,
+ const char *reason)
+{
+ char *node_uuid = NULL;
+ pcmk__graph_action_t *down = NULL;
+
+ if(!pcmk__str_eq(op, "delete", pcmk__str_casei)) {
+ abort_transition(INFINITY, pcmk__graph_restart, reason, change);
+ return;
+ }
+
+ node_uuid = extract_node_uuid(xpath);
+ if(node_uuid == NULL) {
+ crm_err("Could not extract node ID from %s", xpath);
+ abort_transition(INFINITY, pcmk__graph_restart, reason, change);
+ return;
+ }
+
+ down = match_down_event(node_uuid);
+ if (down == NULL) {
+ crm_trace("Not expecting %s to be down (%s)", node_uuid, xpath);
+ abort_transition(INFINITY, pcmk__graph_restart, reason, change);
+ } else {
+ crm_trace("Expecting changes to %s (%s)", node_uuid, xpath);
+ }
+ free(node_uuid);
+}
+
+static void
+process_op_deletion(const char *xpath, xmlNode *change)
+{
+ char *mutable_key = strdup(xpath);
+ char *key;
+ char *node_uuid;
+
+ // Extract the part of xpath between last pair of single quotes
+ key = strrchr(mutable_key, '\'');
+ if (key != NULL) {
+ *key = '\0';
+ key = strrchr(mutable_key, '\'');
+ }
+ if (key == NULL) {
+ crm_warn("Ignoring malformed CIB update (resource deletion of %s)",
+ xpath);
+ free(mutable_key);
+ return;
+ }
+ ++key;
+
+ node_uuid = extract_node_uuid(xpath);
+ if (confirm_cancel_action(key, node_uuid) == FALSE) {
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Resource operation removal", change);
+ }
+ free(mutable_key);
+ free(node_uuid);
+}
+
+static void
+process_delete_diff(const char *xpath, const char *op, xmlNode *change)
+{
+ if (strstr(xpath, "/" XML_LRM_TAG_RSC_OP "[")) {
+ process_op_deletion(xpath, change);
+
+ } else if (strstr(xpath, "/" XML_CIB_TAG_LRM "[")) {
+ abort_unless_down(xpath, op, change, "Resource state removal");
+
+ } else if (strstr(xpath, "/" XML_CIB_TAG_STATE "[")) {
+ abort_unless_down(xpath, op, change, "Node state removal");
+
+ } else {
+ crm_trace("Ignoring delete of %s", xpath);
+ }
+}
+
+static void
+process_node_state_diff(xmlNode *state, xmlNode *change, const char *op,
+ const char *xpath)
+{
+ xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM);
+
+ process_resource_updates(ID(state), lrm, change, op, xpath);
+}
+
+static void
+process_status_diff(xmlNode *status, xmlNode *change, const char *op,
+ const char *xpath)
+{
+ for (xmlNode *state = pcmk__xml_first_child(status); state != NULL;
+ state = pcmk__xml_next(state)) {
+ process_node_state_diff(state, change, op, xpath);
+ }
+}
+
+static void
+process_cib_diff(xmlNode *cib, xmlNode *change, const char *op,
+ const char *xpath)
+{
+ xmlNode *status = first_named_child(cib, XML_CIB_TAG_STATUS);
+ xmlNode *config = first_named_child(cib, XML_CIB_TAG_CONFIGURATION);
+
+ if (status) {
+ process_status_diff(status, change, op, xpath);
+ }
+ if (config) {
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Non-status-only change", change);
+ }
+}
+
+static void
+te_update_diff_v2(xmlNode *diff)
+{
+ crm_log_xml_trace(diff, "Patch:Raw");
+
+ for (xmlNode *change = pcmk__xml_first_child(diff); change != NULL;
+ change = pcmk__xml_next(change)) {
+
+ xmlNode *match = NULL;
+ const char *name = NULL;
+ const char *xpath = crm_element_value(change, XML_DIFF_PATH);
+
+ // Possible ops: create, modify, delete, move
+ const char *op = crm_element_value(change, XML_DIFF_OP);
+
+ // Ignore uninteresting updates
+ if (op == NULL) {
+ continue;
+
+ } else if (xpath == NULL) {
+ crm_trace("Ignoring %s change for version field", op);
+ continue;
+
+ } else if ((strcmp(op, "move") == 0)
+ && (strstr(xpath,
+ "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION
+ "/" XML_CIB_TAG_RESOURCES) == NULL)) {
+ /* We still need to consider moves within the resources section,
+ * since they affect placement order.
+ */
+ crm_trace("Ignoring move change at %s", xpath);
+ continue;
+ }
+
+ // Find the result of create/modify ops
+ if (strcmp(op, "create") == 0) {
+ match = change->children;
+
+ } else if (strcmp(op, "modify") == 0) {
+ match = first_named_child(change, XML_DIFF_RESULT);
+ if(match) {
+ match = match->children;
+ }
+
+ } else if (!pcmk__str_any_of(op, "delete", "move", NULL)) {
+ crm_warn("Ignoring malformed CIB update (%s operation on %s is unrecognized)",
+ op, xpath);
+ continue;
+ }
+
+ if (match) {
+ if (match->type == XML_COMMENT_NODE) {
+ crm_trace("Ignoring %s operation for comment at %s", op, xpath);
+ continue;
+ }
+ name = (const char *)match->name;
+ }
+
+ crm_trace("Handling %s operation for %s%s%s",
+ op, (xpath? xpath : "CIB"),
+ (name? " matched by " : ""), (name? name : ""));
+
+ if (strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION)) {
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Configuration change", change);
+ break; // Won't be packaged with operation results we may be waiting for
+
+ } else if (strstr(xpath, "/" XML_CIB_TAG_TICKETS)
+ || pcmk__str_eq(name, XML_CIB_TAG_TICKETS, pcmk__str_none)) {
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Ticket attribute change", change);
+ break; // Won't be packaged with operation results we may be waiting for
+
+ } else if (strstr(xpath, "/" XML_TAG_TRANSIENT_NODEATTRS "[")
+ || pcmk__str_eq(name, XML_TAG_TRANSIENT_NODEATTRS,
+ pcmk__str_none)) {
+ abort_unless_down(xpath, op, change, "Transient attribute change");
+ break; // Won't be packaged with operation results we may be waiting for
+
+ } else if (strcmp(op, "delete") == 0) {
+ process_delete_diff(xpath, op, change);
+
+ } else if (name == NULL) {
+ crm_warn("Ignoring malformed CIB update (%s at %s has no result)",
+ op, xpath);
+
+ } else if (strcmp(name, XML_TAG_CIB) == 0) {
+ process_cib_diff(match, change, op, xpath);
+
+ } else if (strcmp(name, XML_CIB_TAG_STATUS) == 0) {
+ process_status_diff(match, change, op, xpath);
+
+ } else if (strcmp(name, XML_CIB_TAG_STATE) == 0) {
+ process_node_state_diff(match, change, op, xpath);
+
+ } else if (strcmp(name, XML_CIB_TAG_LRM) == 0) {
+ process_resource_updates(ID(match), match, change, op, xpath);
+
+ } else if (strcmp(name, XML_LRM_TAG_RESOURCES) == 0) {
+ char *local_node = pcmk__xpath_node_id(xpath, "lrm");
+
+ process_resource_updates(local_node, match, change, op, xpath);
+ free(local_node);
+
+ } else if (strcmp(name, XML_LRM_TAG_RESOURCE) == 0) {
+ char *local_node = pcmk__xpath_node_id(xpath, "lrm");
+
+ process_lrm_resource_diff(match, local_node);
+ free(local_node);
+
+ } else if (strcmp(name, XML_LRM_TAG_RSC_OP) == 0) {
+ char *local_node = pcmk__xpath_node_id(xpath, "lrm");
+
+ process_graph_event(match, local_node);
+ free(local_node);
+
+ } else {
+ crm_warn("Ignoring malformed CIB update (%s at %s has unrecognized result %s)",
+ op, xpath, name);
+ }
+ }
+}
+
+void
+te_update_diff(const char *event, xmlNode * msg)
+{
+ xmlNode *diff = NULL;
+ const char *op = NULL;
+ int rc = -EINVAL;
+ int format = 1;
+ int p_add[] = { 0, 0, 0 };
+ int p_del[] = { 0, 0, 0 };
+
+ CRM_CHECK(msg != NULL, return);
+ crm_element_value_int(msg, F_CIB_RC, &rc);
+
+ if (controld_globals.transition_graph == NULL) {
+ crm_trace("No graph");
+ return;
+
+ } else if (rc < pcmk_ok) {
+ crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc));
+ return;
+
+ } else if (controld_globals.transition_graph->complete
+ && (controld_globals.fsa_state != S_IDLE)
+ && (controld_globals.fsa_state != S_TRANSITION_ENGINE)
+ && (controld_globals.fsa_state != S_POLICY_ENGINE)) {
+ crm_trace("Filter state=%s (complete)",
+ fsa_state2string(controld_globals.fsa_state));
+ return;
+ }
+
+ op = crm_element_value(msg, F_CIB_OPERATION);
+ diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
+
+ xml_patch_versions(diff, p_add, p_del);
+ crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op,
+ p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2],
+ fsa_state2string(controld_globals.fsa_state));
+
+ crm_element_value_int(diff, "format", &format);
+ switch (format) {
+ case 1:
+ te_update_diff_v1(event, diff);
+ break;
+ case 2:
+ te_update_diff_v2(diff);
+ break;
+ default:
+ crm_warn("Ignoring malformed CIB update (unknown patch format %d)",
+ format);
+ }
+ controld_remove_all_outside_events();
+}
+
+void
+process_te_message(xmlNode * msg, xmlNode * xml_data)
+{
+ const char *value = NULL;
+ xmlXPathObject *xpathObj = NULL;
+ int nmatches = 0;
+
+ CRM_CHECK(msg != NULL, return);
+
+ // Transition requests must specify transition engine as subsystem
+ value = crm_element_value(msg, F_CRM_SYS_TO);
+ if (pcmk__str_empty(value)
+ || !pcmk__str_eq(value, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
+ crm_info("Received invalid transition request: subsystem '%s' not '"
+ CRM_SYSTEM_TENGINE "'", pcmk__s(value, ""));
+ return;
+ }
+
+ // Only the lrm_invoke command is supported as a transition request
+ value = crm_element_value(msg, F_CRM_TASK);
+ if (!pcmk__str_eq(value, CRM_OP_INVOKE_LRM, pcmk__str_none)) {
+ crm_info("Received invalid transition request: command '%s' not '"
+ CRM_OP_INVOKE_LRM "'", pcmk__s(value, ""));
+ return;
+ }
+
+ // Transition requests must be marked as coming from the executor
+ value = crm_element_value(msg, F_CRM_SYS_FROM);
+ if (!pcmk__str_eq(value, CRM_SYSTEM_LRMD, pcmk__str_none)) {
+ crm_info("Received invalid transition request: from '%s' not '"
+ CRM_SYSTEM_LRMD "'", pcmk__s(value, ""));
+ return;
+ }
+
+ crm_debug("Processing transition request with ref='%s' origin='%s'",
+ pcmk__s(crm_element_value(msg, F_CRM_REFERENCE), ""),
+ pcmk__s(crm_element_value(msg, F_ORIG), ""));
+
+ xpathObj = xpath_search(xml_data, "//" XML_LRM_TAG_RSC_OP);
+ nmatches = numXpathResults(xpathObj);
+ if (nmatches == 0) {
+ crm_err("Received transition request with no results (bug?)");
+ } else {
+ for (int lpc = 0; lpc < nmatches; lpc++) {
+ xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
+ const char *node = get_node_id(rsc_op);
+
+ process_graph_event(rsc_op, node);
+ }
+ }
+ freeXpathObject(xpathObj);
+}
+
+void
+cib_action_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ if (rc < pcmk_ok) {
+ crm_err("Update %d FAILED: %s", call_id, pcmk_strerror(rc));
+ }
+}
+
+/*!
+ * \brief Handle a timeout in node-to-node communication
+ *
+ * \param[in,out] data Pointer to graph action
+ *
+ * \return FALSE (indicating that source should be not be re-added)
+ */
+gboolean
+action_timer_callback(gpointer data)
+{
+ pcmk__graph_action_t *action = (pcmk__graph_action_t *) data;
+ const char *task = NULL;
+ const char *on_node = NULL;
+ const char *via_node = NULL;
+
+ CRM_CHECK(data != NULL, return FALSE);
+
+ stop_te_timer(action);
+
+ task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
+ on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ via_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
+
+ if (controld_globals.transition_graph->complete) {
+ crm_notice("Node %s did not send %s result (via %s) within %dms "
+ "(ignoring because transition not in progress)",
+ (on_node? on_node : ""), (task? task : "unknown action"),
+ (via_node? via_node : "controller"), action->timeout);
+ } else {
+ /* fail the action */
+
+ crm_err("Node %s did not send %s result (via %s) within %dms "
+ "(action timeout plus cluster-delay)",
+ (on_node? on_node : ""), (task? task : "unknown action"),
+ (via_node? via_node : "controller"),
+ (action->timeout
+ + controld_globals.transition_graph->network_delay));
+ pcmk__log_graph_action(LOG_ERR, action);
+
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
+
+ te_action_confirmed(action, controld_globals.transition_graph);
+ abort_transition(INFINITY, pcmk__graph_restart, "Action lost", NULL);
+
+ // Record timeout in the CIB if appropriate
+ if ((action->type == pcmk__rsc_graph_action)
+ && controld_action_is_recordable(task)) {
+ controld_record_action_timeout(action);
+ }
+ }
+
+ return FALSE;
+}
diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c
new file mode 100644
index 0000000..d4e2b0f
--- /dev/null
+++ b/daemons/controld/controld_te_events.c
@@ -0,0 +1,601 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <crm/crm.h>
+#include <crm/cib.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+
+#include <pacemaker-controld.h>
+
+#include <crm/common/attrd_internal.h>
+#include <crm/common/ipc_attrd_internal.h>
+
+/*!
+ * \internal
+ * \brief Action numbers of outside events processed in current update diff
+ *
+ * This table is to be used as a set. It should be empty when the transitioner
+ * begins processing a CIB update diff. It ensures that if there are multiple
+ * events (for example, "_last_0" and "_last_failure_0") for the same action,
+ * only one of them updates the failcount. Events that originate outside the
+ * cluster can't be confirmed, since they're not in the transition graph.
+ */
+static GHashTable *outside_events = NULL;
+
+/*!
+ * \internal
+ * \brief Empty the hash table containing action numbers of outside events
+ */
+void
+controld_remove_all_outside_events(void)
+{
+ if (outside_events != NULL) {
+ g_hash_table_remove_all(outside_events);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Destroy the hash table containing action numbers of outside events
+ */
+void
+controld_destroy_outside_events_table(void)
+{
+ if (outside_events != NULL) {
+ g_hash_table_destroy(outside_events);
+ outside_events = NULL;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add an outside event's action number to a set
+ *
+ * \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the
+ * event was not already in the set, or \p pcmk_rc_already otherwise.
+ */
+static int
+record_outside_event(gint action_num)
+{
+ if (outside_events == NULL) {
+ outside_events = g_hash_table_new(NULL, NULL);
+ }
+
+ if (g_hash_table_add(outside_events, GINT_TO_POINTER(action_num))) {
+ return pcmk_rc_ok;
+ }
+ return pcmk_rc_already;
+}
+
+gboolean
+fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node)
+{
+ const char *target_uuid = NULL;
+ const char *router = NULL;
+ const char *router_uuid = NULL;
+ xmlNode *last_action = NULL;
+
+ GList *gIter = NULL;
+ GList *gIter2 = NULL;
+
+ if (graph == NULL || graph->complete) {
+ return FALSE;
+ }
+
+ gIter = graph->synapses;
+ for (; gIter != NULL; gIter = gIter->next) {
+ pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data;
+
+ if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) {
+ /* We've already been here */
+ continue;
+ }
+
+ gIter2 = synapse->actions;
+ for (; gIter2 != NULL; gIter2 = gIter2->next) {
+ pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data;
+
+ if ((action->type == pcmk__pseudo_graph_action)
+ || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
+ continue;
+ } else if (action->type == pcmk__cluster_graph_action) {
+ const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
+
+ if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
+ continue;
+ }
+ }
+
+ target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
+ router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
+ if (router) {
+ crm_node_t *node = crm_get_peer(0, router);
+ if (node) {
+ router_uuid = node->uuid;
+ }
+ }
+
+ if (pcmk__str_eq(target_uuid, down_node, pcmk__str_casei) || pcmk__str_eq(router_uuid, down_node, pcmk__str_casei)) {
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
+ pcmk__set_synapse_flags(synapse, pcmk__synapse_failed);
+ last_action = action->xml;
+ stop_te_timer(action);
+ pcmk__update_graph(graph, action);
+
+ if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
+ crm_notice("Action %d (%s) was pending on %s (offline)",
+ action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node);
+ } else {
+ crm_info("Action %d (%s) is scheduled for %s (offline)",
+ action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node);
+ }
+ }
+ }
+ }
+
+ if (last_action != NULL) {
+ crm_info("Node %s shutdown resulted in un-runnable actions", down_node);
+ abort_transition(INFINITY, pcmk__graph_restart, "Node failure",
+ last_action);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+/*!
+ * \internal
+ * \brief Update failure-related node attributes if warranted
+ *
+ * \param[in] event XML describing operation that (maybe) failed
+ * \param[in] event_node_uuid Node that event occurred on
+ * \param[in] rc Actual operation return code
+ * \param[in] target_rc Expected operation return code
+ * \param[in] do_update If TRUE, do update regardless of operation type
+ * \param[in] ignore_failures If TRUE, update last failure but not fail count
+ *
+ * \return TRUE if this was not a direct nack, success or lrm status refresh
+ */
+static gboolean
+update_failcount(const xmlNode *event, const char *event_node_uuid, int rc,
+ int target_rc, gboolean do_update, gboolean ignore_failures)
+{
+ guint interval_ms = 0;
+
+ char *task = NULL;
+ char *rsc_id = NULL;
+
+ const char *value = NULL;
+ const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
+ const char *on_uname = crm_peer_uname(event_node_uuid);
+ const char *origin = crm_element_value(event, XML_ATTR_ORIGIN);
+
+ // Nothing needs to be done for success or status refresh
+ if (rc == target_rc) {
+ return FALSE;
+ } else if (pcmk__str_eq(origin, "build_active_RAs", pcmk__str_casei)) {
+ crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh",
+ id, rc, on_uname);
+ return FALSE;
+ }
+
+ /* Sanity check */
+ CRM_CHECK(on_uname != NULL, return TRUE);
+ CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval_ms),
+ crm_err("Couldn't parse: %s", ID(event)); goto bail);
+
+ /* Decide whether update is necessary and what value to use */
+ if ((interval_ms > 0)
+ || pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_none)
+ || pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_none)) {
+ do_update = TRUE;
+
+ } else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_none)) {
+ do_update = TRUE;
+ value = pcmk__s(controld_globals.transition_graph->failed_start_offset,
+ CRM_INFINITY_S);
+
+ } else if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_none)) {
+ do_update = TRUE;
+ value = pcmk__s(controld_globals.transition_graph->failed_stop_offset,
+ CRM_INFINITY_S);
+ }
+
+ if (do_update) {
+ pcmk__attrd_query_pair_t *fail_pair = NULL;
+ pcmk__attrd_query_pair_t *last_pair = NULL;
+ char *fail_name = NULL;
+ char *last_name = NULL;
+ GList *attrs = NULL;
+
+ uint32_t opts = pcmk__node_attr_none;
+
+ char *now = pcmk__ttoa(time(NULL));
+
+ // Fail count will be either incremented or set to infinity
+ if (!pcmk_str_is_infinity(value)) {
+ value = XML_NVPAIR_ATTR_VALUE "++";
+ }
+
+ if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) {
+ opts |= pcmk__node_attr_remote;
+ }
+
+ crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)",
+ (ignore_failures? "last failure" : "failcount"),
+ rsc_id, on_uname, task, rc, value, now);
+
+ /* Update the fail count, if we're not ignoring failures */
+ if (!ignore_failures) {
+ fail_pair = calloc(1, sizeof(pcmk__attrd_query_pair_t));
+ CRM_ASSERT(fail_pair != NULL);
+
+ fail_name = pcmk__failcount_name(rsc_id, task, interval_ms);
+ fail_pair->name = fail_name;
+ fail_pair->value = value;
+ fail_pair->node = on_uname;
+
+ attrs = g_list_prepend(attrs, fail_pair);
+ }
+
+ /* Update the last failure time (even if we're ignoring failures,
+ * so that failure can still be detected and shown, e.g. by crm_mon)
+ */
+ last_pair = calloc(1, sizeof(pcmk__attrd_query_pair_t));
+ CRM_ASSERT(last_pair != NULL);
+
+ last_name = pcmk__lastfailure_name(rsc_id, task, interval_ms);
+ last_pair->name = last_name;
+ last_pair->value = now;
+ last_pair->node = on_uname;
+
+ attrs = g_list_prepend(attrs, last_pair);
+
+ update_attrd_list(attrs, opts);
+
+ free(fail_name);
+ free(fail_pair);
+
+ free(last_name);
+ free(last_pair);
+ g_list_free(attrs);
+
+ free(now);
+ }
+
+ bail:
+ free(rsc_id);
+ free(task);
+ return TRUE;
+}
+
+pcmk__graph_action_t *
+controld_get_action(int id)
+{
+ for (GList *item = controld_globals.transition_graph->synapses;
+ item != NULL; item = item->next) {
+ pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) item->data;
+
+ for (GList *item2 = synapse->actions; item2; item2 = item2->next) {
+ pcmk__graph_action_t *action = (pcmk__graph_action_t *) item2->data;
+
+ if (action->id == id) {
+ return action;
+ }
+ }
+ }
+ return NULL;
+}
+
+pcmk__graph_action_t *
+get_cancel_action(const char *id, const char *node)
+{
+ GList *gIter = NULL;
+ GList *gIter2 = NULL;
+
+ gIter = controld_globals.transition_graph->synapses;
+ for (; gIter != NULL; gIter = gIter->next) {
+ pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data;
+
+ gIter2 = synapse->actions;
+ for (; gIter2 != NULL; gIter2 = gIter2->next) {
+ const char *task = NULL;
+ const char *target = NULL;
+ pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data;
+
+ task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
+ if (!pcmk__str_eq(CRMD_ACTION_CANCEL, task, pcmk__str_casei)) {
+ continue;
+ }
+
+ task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
+ if (!pcmk__str_eq(task, id, pcmk__str_casei)) {
+ crm_trace("Wrong key %s for %s on %s", task, id, node);
+ continue;
+ }
+
+ target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
+ if (node && !pcmk__str_eq(target, node, pcmk__str_casei)) {
+ crm_trace("Wrong node %s for %s on %s", target, id, node);
+ continue;
+ }
+
+ crm_trace("Found %s on %s", id, node);
+ return action;
+ }
+ }
+
+ return NULL;
+}
+
+bool
+confirm_cancel_action(const char *id, const char *node_id)
+{
+ const char *op_key = NULL;
+ const char *node_name = NULL;
+ pcmk__graph_action_t *cancel = get_cancel_action(id, node_id);
+
+ if (cancel == NULL) {
+ return FALSE;
+ }
+ op_key = crm_element_value(cancel->xml, XML_LRM_ATTR_TASK_KEY);
+ node_name = crm_element_value(cancel->xml, XML_LRM_ATTR_TARGET);
+
+ stop_te_timer(cancel);
+ te_action_confirmed(cancel, controld_globals.transition_graph);
+
+ crm_info("Cancellation of %s on %s confirmed (action %d)",
+ op_key, node_name, cancel->id);
+ return TRUE;
+}
+
+/* downed nodes are listed like: <downed> <node id="UUID1" /> ... </downed> */
+#define XPATH_DOWNED "//" XML_GRAPH_TAG_DOWNED \
+ "/" XML_CIB_TAG_NODE "[@" XML_ATTR_ID "='%s']"
+
+/*!
+ * \brief Find a transition event that would have made a specified node down
+ *
+ * \param[in] target UUID of node to match
+ *
+ * \return Matching event if found, NULL otherwise
+ */
+pcmk__graph_action_t *
+match_down_event(const char *target)
+{
+ pcmk__graph_action_t *match = NULL;
+ xmlXPathObjectPtr xpath_ret = NULL;
+ GList *gIter, *gIter2;
+
+ char *xpath = crm_strdup_printf(XPATH_DOWNED, target);
+
+ for (gIter = controld_globals.transition_graph->synapses;
+ gIter != NULL && match == NULL;
+ gIter = gIter->next) {
+
+ for (gIter2 = ((pcmk__graph_synapse_t * ) gIter->data)->actions;
+ gIter2 != NULL && match == NULL;
+ gIter2 = gIter2->next) {
+
+ match = (pcmk__graph_action_t *) gIter2->data;
+ if (pcmk_is_set(match->flags, pcmk__graph_action_executed)) {
+ xpath_ret = xpath_search(match->xml, xpath);
+ if (numXpathResults(xpath_ret) < 1) {
+ match = NULL;
+ }
+ freeXpathObject(xpath_ret);
+ } else {
+ // Only actions that were actually started can match
+ match = NULL;
+ }
+ }
+ }
+
+ free(xpath);
+
+ if (match != NULL) {
+ crm_debug("Shutdown action %d (%s) found for node %s", match->id,
+ crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY), target);
+ } else {
+ crm_debug("No reason to expect node %s to be down", target);
+ }
+ return match;
+}
+
+void
+process_graph_event(xmlNode *event, const char *event_node)
+{
+ int rc = -1; // Actual result
+ int target_rc = -1; // Expected result
+ int status = -1; // Executor status
+ int callid = -1; // Executor call ID
+ int transition_num = -1; // Transition number
+ int action_num = -1; // Action number within transition
+ char *update_te_uuid = NULL;
+ bool ignore_failures = FALSE;
+ const char *id = NULL;
+ const char *desc = NULL;
+ const char *magic = NULL;
+ const char *uname = NULL;
+
+ CRM_ASSERT(event != NULL);
+
+/*
+<lrm_rsc_op id="rsc_east-05_last_0" operation_key="rsc_east-05_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.6" transition-key="9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" transition-magic="0:7;9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" call-id="17" rc-code="7" op-status="0" interval="0" last-rc-change="1355361636" exec-time="128" queue-time="0" op-digest="c81f5f40b1c9e859c992e800b1aa6972"/>
+*/
+
+ magic = crm_element_value(event, XML_ATTR_TRANSITION_KEY);
+ if (magic == NULL) {
+ /* non-change */
+ return;
+ }
+
+ crm_element_value_int(event, XML_LRM_ATTR_OPSTATUS, &status);
+ if (status == PCMK_EXEC_PENDING) {
+ return;
+ }
+
+ id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
+ crm_element_value_int(event, XML_LRM_ATTR_RC, &rc);
+ crm_element_value_int(event, XML_LRM_ATTR_CALLID, &callid);
+
+ rc = pcmk__effective_rc(rc);
+
+ if (decode_transition_key(magic, &update_te_uuid, &transition_num,
+ &action_num, &target_rc) == FALSE) {
+ // decode_transition_key() already logged the bad key
+ crm_err("Can't process action %s result: Incompatible versions? "
+ CRM_XS " call-id=%d", id, callid);
+ abort_transition(INFINITY, pcmk__graph_restart, "Bad event", event);
+ return;
+ }
+
+ if (transition_num == -1) {
+ // E.g. crm_resource --fail
+ if (record_outside_event(action_num) != pcmk_rc_ok) {
+ crm_debug("Outside event with transition key '%s' has already been "
+ "processed", magic);
+ goto bail;
+ }
+ desc = "initiated outside of the cluster";
+ abort_transition(INFINITY, pcmk__graph_restart, "Unexpected event",
+ event);
+
+ } else if ((action_num < 0)
+ || !pcmk__str_eq(update_te_uuid, controld_globals.te_uuid,
+ pcmk__str_none)) {
+ desc = "initiated by a different DC";
+ abort_transition(INFINITY, pcmk__graph_restart, "Foreign event", event);
+
+ } else if ((controld_globals.transition_graph->id != transition_num)
+ || controld_globals.transition_graph->complete) {
+
+ // Action is not from currently active transition
+
+ guint interval_ms = 0;
+
+ if (parse_op_key(id, NULL, NULL, &interval_ms)
+ && (interval_ms != 0)) {
+ /* Recurring actions have the transition number they were first
+ * scheduled in.
+ */
+
+ if (status == PCMK_EXEC_CANCELLED) {
+ confirm_cancel_action(id, get_node_id(event));
+ goto bail;
+ }
+
+ desc = "arrived after initial scheduling";
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Change in recurring result", event);
+
+ } else if (controld_globals.transition_graph->id != transition_num) {
+ desc = "arrived really late";
+ abort_transition(INFINITY, pcmk__graph_restart, "Old event", event);
+ } else {
+ desc = "arrived late";
+ abort_transition(INFINITY, pcmk__graph_restart, "Inactive graph",
+ event);
+ }
+
+ } else {
+ // Event is result of an action from currently active transition
+ pcmk__graph_action_t *action = controld_get_action(action_num);
+
+ if (action == NULL) {
+ // Should never happen
+ desc = "unknown";
+ abort_transition(INFINITY, pcmk__graph_restart, "Unknown event",
+ event);
+
+ } else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
+ /* Nothing further needs to be done if the action has already been
+ * confirmed. This can happen e.g. when processing both an
+ * "xxx_last_0" or "xxx_last_failure_0" record as well as the main
+ * history record, which would otherwise result in incorrectly
+ * bumping the fail count twice.
+ */
+ crm_log_xml_debug(event, "Event already confirmed:");
+ goto bail;
+
+ } else {
+ /* An action result needs to be confirmed.
+ * (This is the only case where desc == NULL.)
+ */
+
+ if (pcmk__str_eq(crm_meta_value(action->params, XML_OP_ATTR_ON_FAIL), "ignore", pcmk__str_casei)) {
+ ignore_failures = TRUE;
+
+ } else if (rc != target_rc) {
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
+ }
+
+ stop_te_timer(action);
+ te_action_confirmed(action, controld_globals.transition_graph);
+
+ if (pcmk_is_set(action->flags, pcmk__graph_action_failed)) {
+ abort_transition(action->synapse->priority + 1,
+ pcmk__graph_restart, "Event failed", event);
+ }
+ }
+ }
+
+ if (id == NULL) {
+ id = "unknown action";
+ }
+ uname = crm_element_value(event, XML_LRM_ATTR_TARGET);
+ if (uname == NULL) {
+ uname = "unknown node";
+ }
+
+ if (status == PCMK_EXEC_INVALID) {
+ // We couldn't attempt the action
+ crm_info("Transition %d action %d (%s on %s): %s",
+ transition_num, action_num, id, uname,
+ pcmk_exec_status_str(status));
+
+ } else if (desc && update_failcount(event, event_node, rc, target_rc,
+ (transition_num == -1), FALSE)) {
+ crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
+ CRM_XS " target-rc=%d rc=%d call-id=%d event='%s'",
+ transition_num, action_num, id, uname,
+ services_ocf_exitcode_str(target_rc),
+ services_ocf_exitcode_str(rc),
+ target_rc, rc, callid, desc);
+
+ } else if (desc) {
+ crm_info("Transition %d action %d (%s on %s): %s "
+ CRM_XS " rc=%d target-rc=%d call-id=%d",
+ transition_num, action_num, id, uname,
+ desc, rc, target_rc, callid);
+
+ } else if (rc == target_rc) {
+ crm_info("Transition %d action %d (%s on %s) confirmed: %s "
+ CRM_XS " rc=%d call-id=%d",
+ transition_num, action_num, id, uname,
+ services_ocf_exitcode_str(rc), rc, callid);
+
+ } else {
+ update_failcount(event, event_node, rc, target_rc,
+ (transition_num == -1), ignore_failures);
+ crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
+ CRM_XS " target-rc=%d rc=%d call-id=%d",
+ transition_num, action_num, id, uname,
+ services_ocf_exitcode_str(target_rc),
+ services_ocf_exitcode_str(rc),
+ target_rc, rc, callid);
+ }
+
+ bail:
+ free(update_te_uuid);
+}
diff --git a/daemons/controld/controld_te_utils.c b/daemons/controld/controld_te_utils.c
new file mode 100644
index 0000000..ecbc0b2
--- /dev/null
+++ b/daemons/controld/controld_te_utils.c
@@ -0,0 +1,367 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+
+#include <pacemaker-controld.h>
+
+//! Triggers transition graph processing
+static crm_trigger_t *transition_trigger = NULL;
+
+gboolean
+stop_te_timer(pcmk__graph_action_t *action)
+{
+ if (action == NULL) {
+ return FALSE;
+ }
+ if (action->timer != 0) {
+ crm_trace("Stopping action timer");
+ g_source_remove(action->timer);
+ action->timer = 0;
+ } else {
+ crm_trace("Action timer was already stopped");
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static gboolean
+te_graph_trigger(gpointer user_data)
+{
+ if (controld_globals.transition_graph == NULL) {
+ crm_debug("Nothing to do");
+ return TRUE;
+ }
+
+ crm_trace("Invoking graph %d in state %s",
+ controld_globals.transition_graph->id,
+ fsa_state2string(controld_globals.fsa_state));
+
+ switch (controld_globals.fsa_state) {
+ case S_STARTING:
+ case S_PENDING:
+ case S_NOT_DC:
+ case S_HALT:
+ case S_ILLEGAL:
+ case S_STOPPING:
+ case S_TERMINATE:
+ return TRUE;
+ default:
+ break;
+ }
+
+ if (!controld_globals.transition_graph->complete) {
+ enum pcmk__graph_status graph_rc;
+ int orig_limit = controld_globals.transition_graph->batch_limit;
+ int throttled_limit = throttle_get_total_job_limit(orig_limit);
+
+ controld_globals.transition_graph->batch_limit = throttled_limit;
+ graph_rc = pcmk__execute_graph(controld_globals.transition_graph);
+ controld_globals.transition_graph->batch_limit = orig_limit;
+
+ if (graph_rc == pcmk__graph_active) {
+ crm_trace("Transition not yet complete");
+ return TRUE;
+
+ } else if (graph_rc == pcmk__graph_pending) {
+ crm_trace("Transition not yet complete - no actions fired");
+ return TRUE;
+ }
+
+ if (graph_rc != pcmk__graph_complete) {
+ crm_warn("Transition failed: %s",
+ pcmk__graph_status2text(graph_rc));
+ pcmk__log_graph(LOG_NOTICE, controld_globals.transition_graph);
+ }
+ }
+
+ crm_debug("Transition %d is now complete",
+ controld_globals.transition_graph->id);
+ controld_globals.transition_graph->complete = true;
+ notify_crmd(controld_globals.transition_graph);
+
+ return TRUE;
+}
+
+/*!
+ * \internal
+ * \brief Initialize transition trigger
+ */
+void
+controld_init_transition_trigger(void)
+{
+ transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger,
+ NULL);
+}
+
+/*!
+ * \internal
+ * \brief Destroy transition trigger
+ */
+void
+controld_destroy_transition_trigger(void)
+{
+ mainloop_destroy_trigger(transition_trigger);
+ transition_trigger = NULL;
+}
+
+void
+controld_trigger_graph_as(const char *fn, int line)
+{
+ crm_trace("%s:%d - Triggered graph processing", fn, line);
+ mainloop_set_trigger(transition_trigger);
+}
+
+static struct abort_timer_s {
+ bool aborted;
+ guint id;
+ int priority;
+ enum pcmk__graph_next action;
+ const char *text;
+} abort_timer = { 0, };
+
+static gboolean
+abort_timer_popped(gpointer data)
+{
+ if (AM_I_DC && (abort_timer.aborted == FALSE)) {
+ abort_transition(abort_timer.priority, abort_timer.action,
+ abort_timer.text, NULL);
+ }
+ abort_timer.id = 0;
+ return FALSE; // do not immediately reschedule timer
+}
+
+/*!
+ * \internal
+ * \brief Abort transition after delay, if not already aborted in that time
+ *
+ * \param[in] abort_text Must be literal string
+ */
+void
+abort_after_delay(int abort_priority, enum pcmk__graph_next abort_action,
+ const char *abort_text, guint delay_ms)
+{
+ if (abort_timer.id) {
+ // Timer already in progress, stop and reschedule
+ g_source_remove(abort_timer.id);
+ }
+ abort_timer.aborted = FALSE;
+ abort_timer.priority = abort_priority;
+ abort_timer.action = abort_action;
+ abort_timer.text = abort_text;
+ abort_timer.id = g_timeout_add(delay_ms, abort_timer_popped, NULL);
+}
+
+static const char *
+abort2text(enum pcmk__graph_next abort_action)
+{
+ switch (abort_action) {
+ case pcmk__graph_done: return "done";
+ case pcmk__graph_wait: return "stop";
+ case pcmk__graph_restart: return "restart";
+ case pcmk__graph_shutdown: return "shutdown";
+ }
+ return "unknown";
+}
+
+static bool
+update_abort_priority(pcmk__graph_t *graph, int priority,
+ enum pcmk__graph_next action, const char *abort_reason)
+{
+ bool change = FALSE;
+
+ if (graph == NULL) {
+ return change;
+ }
+
+ if (graph->abort_priority < priority) {
+ crm_debug("Abort priority upgraded from %d to %d", graph->abort_priority, priority);
+ graph->abort_priority = priority;
+ if (graph->abort_reason != NULL) {
+ crm_debug("'%s' abort superseded by %s", graph->abort_reason, abort_reason);
+ }
+ graph->abort_reason = abort_reason;
+ change = TRUE;
+ }
+
+ if (graph->completion_action < action) {
+ crm_debug("Abort action %s superseded by %s: %s",
+ abort2text(graph->completion_action), abort2text(action), abort_reason);
+ graph->completion_action = action;
+ change = TRUE;
+ }
+
+ return change;
+}
+
+void
+abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action,
+ const char *abort_text, const xmlNode *reason,
+ const char *fn, int line)
+{
+ int add[] = { 0, 0, 0 };
+ int del[] = { 0, 0, 0 };
+ int level = LOG_INFO;
+ const xmlNode *diff = NULL;
+ const xmlNode *change = NULL;
+
+ CRM_CHECK(controld_globals.transition_graph != NULL, return);
+
+ switch (controld_globals.fsa_state) {
+ case S_STARTING:
+ case S_PENDING:
+ case S_NOT_DC:
+ case S_HALT:
+ case S_ILLEGAL:
+ case S_STOPPING:
+ case S_TERMINATE:
+ crm_info("Abort %s suppressed: state=%s (%scomplete)",
+ abort_text, fsa_state2string(controld_globals.fsa_state),
+ (controld_globals.transition_graph->complete? "" : "in"));
+ return;
+ default:
+ break;
+ }
+
+ abort_timer.aborted = TRUE;
+ controld_expect_sched_reply(NULL);
+
+ if (!controld_globals.transition_graph->complete
+ && update_abort_priority(controld_globals.transition_graph,
+ abort_priority, abort_action,
+ abort_text)) {
+ level = LOG_NOTICE;
+ }
+
+ if (reason != NULL) {
+ const xmlNode *search = NULL;
+
+ for(search = reason; search; search = search->parent) {
+ if (pcmk__str_eq(XML_TAG_DIFF, TYPE(search), pcmk__str_casei)) {
+ diff = search;
+ break;
+ }
+ }
+
+ if(diff) {
+ xml_patch_versions(diff, add, del);
+ for(search = reason; search; search = search->parent) {
+ if (pcmk__str_eq(XML_DIFF_CHANGE, TYPE(search), pcmk__str_casei)) {
+ change = search;
+ break;
+ }
+ }
+ }
+ }
+
+ if (reason == NULL) {
+ do_crm_log(level,
+ "Transition %d aborted: %s " CRM_XS " source=%s:%d "
+ "complete=%s", controld_globals.transition_graph->id,
+ abort_text, fn, line,
+ pcmk__btoa(controld_globals.transition_graph->complete));
+
+ } else if(change == NULL) {
+ GString *local_path = pcmk__element_xpath(reason);
+ CRM_ASSERT(local_path != NULL);
+
+ do_crm_log(level, "Transition %d aborted by %s.%s: %s "
+ CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
+ controld_globals.transition_graph->id, TYPE(reason),
+ ID(reason), abort_text, add[0], add[1], add[2], fn, line,
+ (const char *) local_path->str,
+ pcmk__btoa(controld_globals.transition_graph->complete));
+ g_string_free(local_path, TRUE);
+
+ } else {
+ const char *kind = NULL;
+ const char *op = crm_element_value(change, XML_DIFF_OP);
+ const char *path = crm_element_value(change, XML_DIFF_PATH);
+
+ if(change == reason) {
+ if(strcmp(op, "create") == 0) {
+ reason = reason->children;
+
+ } else if(strcmp(op, "modify") == 0) {
+ reason = first_named_child(reason, XML_DIFF_RESULT);
+ if(reason) {
+ reason = reason->children;
+ }
+ }
+ }
+
+ kind = TYPE(reason);
+ if(strcmp(op, "delete") == 0) {
+ const char *shortpath = strrchr(path, '/');
+
+ do_crm_log(level, "Transition %d aborted by deletion of %s: %s "
+ CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
+ controld_globals.transition_graph->id,
+ (shortpath? (shortpath + 1) : path), abort_text,
+ add[0], add[1], add[2], fn, line, path,
+ pcmk__btoa(controld_globals.transition_graph->complete));
+
+ } else if (pcmk__str_eq(XML_CIB_TAG_NVPAIR, kind, pcmk__str_none)) {
+ do_crm_log(level, "Transition %d aborted by %s doing %s %s=%s: %s "
+ CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
+ controld_globals.transition_graph->id,
+ crm_element_value(reason, XML_ATTR_ID), op,
+ crm_element_value(reason, XML_NVPAIR_ATTR_NAME),
+ crm_element_value(reason, XML_NVPAIR_ATTR_VALUE),
+ abort_text, add[0], add[1], add[2], fn, line, path,
+ pcmk__btoa(controld_globals.transition_graph->complete));
+
+ } else if (pcmk__str_eq(XML_LRM_TAG_RSC_OP, kind, pcmk__str_none)) {
+ const char *magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC);
+
+ do_crm_log(level, "Transition %d aborted by operation %s '%s' on %s: %s "
+ CRM_XS " magic=%s cib=%d.%d.%d source=%s:%d complete=%s",
+ controld_globals.transition_graph->id,
+ crm_element_value(reason, XML_LRM_ATTR_TASK_KEY), op,
+ crm_element_value(reason, XML_LRM_ATTR_TARGET), abort_text,
+ magic, add[0], add[1], add[2], fn, line,
+ pcmk__btoa(controld_globals.transition_graph->complete));
+
+ } else if (pcmk__str_any_of(kind, XML_CIB_TAG_STATE, XML_CIB_TAG_NODE, NULL)) {
+ const char *uname = crm_peer_uname(ID(reason));
+
+ do_crm_log(level, "Transition %d aborted by %s '%s' on %s: %s "
+ CRM_XS " cib=%d.%d.%d source=%s:%d complete=%s",
+ controld_globals.transition_graph->id,
+ kind, op, (uname? uname : ID(reason)), abort_text,
+ add[0], add[1], add[2], fn, line,
+ pcmk__btoa(controld_globals.transition_graph->complete));
+
+ } else {
+ const char *id = ID(reason);
+
+ do_crm_log(level, "Transition %d aborted by %s.%s '%s': %s "
+ CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
+ controld_globals.transition_graph->id,
+ TYPE(reason), (id? id : ""), (op? op : "change"),
+ abort_text, add[0], add[1], add[2], fn, line, path,
+ pcmk__btoa(controld_globals.transition_graph->complete));
+ }
+ }
+
+ if (controld_globals.transition_graph->complete) {
+ if (controld_get_period_transition_timer() > 0) {
+ controld_stop_transition_timer();
+ controld_start_transition_timer();
+ } else {
+ register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
+ }
+ return;
+ }
+
+ trigger_graph();
+}
diff --git a/daemons/controld/controld_throttle.c b/daemons/controld/controld_throttle.c
new file mode 100644
index 0000000..5b7f9c0
--- /dev/null
+++ b/daemons/controld/controld_throttle.c
@@ -0,0 +1,574 @@
+/*
+ * Copyright 2013-2021 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <unistd.h>
+#include <ctype.h>
+#include <dirent.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/cluster.h>
+
+#include <pacemaker-controld.h>
+
+/* These values don't need to be bits, but these particular values must be kept
+ * for backward compatibility during rolling upgrades.
+ */
+enum throttle_state_e {
+ throttle_none = 0x0000,
+ throttle_low = 0x0001,
+ throttle_med = 0x0010,
+ throttle_high = 0x0100,
+ throttle_extreme = 0x1000,
+};
+
+struct throttle_record_s {
+ int max;
+ enum throttle_state_e mode;
+ char *node;
+};
+
+static int throttle_job_max = 0;
+static float throttle_load_target = 0.0;
+
+#define THROTTLE_FACTOR_LOW 1.2
+#define THROTTLE_FACTOR_MEDIUM 1.6
+#define THROTTLE_FACTOR_HIGH 2.0
+
+static GHashTable *throttle_records = NULL;
+static mainloop_timer_t *throttle_timer = NULL;
+
+static const char *
+load2str(enum throttle_state_e mode)
+{
+ switch (mode) {
+ case throttle_extreme: return "extreme";
+ case throttle_high: return "high";
+ case throttle_med: return "medium";
+ case throttle_low: return "low";
+ case throttle_none: return "negligible";
+ default: return "undetermined";
+ }
+}
+
+#if HAVE_LINUX_PROCFS
+/*!
+ * \internal
+ * \brief Return name of /proc file containing the CIB daemon's load statistics
+ *
+ * \return Newly allocated memory with file name on success, NULL otherwise
+ *
+ * \note It is the caller's responsibility to free the return value.
+ * This will return NULL if the daemon is being run via valgrind.
+ * This should be called only on Linux systems.
+ */
+static char *
+find_cib_loadfile(void)
+{
+ pid_t pid = pcmk__procfs_pid_of("pacemaker-based");
+
+ return pid? crm_strdup_printf("/proc/%lld/stat", (long long) pid) : NULL;
+}
+
+static bool
+throttle_cib_load(float *load)
+{
+/*
+ /proc/[pid]/stat
+ Status information about the process. This is used by ps(1). It is defined in /usr/src/linux/fs/proc/array.c.
+
+ The fields, in order, with their proper scanf(3) format specifiers, are:
+
+ pid %d (1) The process ID.
+
+ comm %s (2) The filename of the executable, in parentheses. This is visible whether or not the executable is swapped out.
+
+ state %c (3) One character from the string "RSDZTW" where R is running, S is sleeping in an interruptible wait, D is waiting in uninterruptible disk sleep, Z is zombie, T is traced or stopped (on a signal), and W is paging.
+
+ ppid %d (4) The PID of the parent.
+
+ pgrp %d (5) The process group ID of the process.
+
+ session %d (6) The session ID of the process.
+
+ tty_nr %d (7) The controlling terminal of the process. (The minor device number is contained in the combination of bits 31 to 20 and 7 to 0; the major device number is in bits 15 to 8.)
+
+ tpgid %d (8) The ID of the foreground process group of the controlling terminal of the process.
+
+ flags %u (%lu before Linux 2.6.22)
+ (9) The kernel flags word of the process. For bit meanings, see the PF_* defines in the Linux kernel source file include/linux/sched.h. Details depend on the kernel version.
+
+ minflt %lu (10) The number of minor faults the process has made which have not required loading a memory page from disk.
+
+ cminflt %lu (11) The number of minor faults that the process's waited-for children have made.
+
+ majflt %lu (12) The number of major faults the process has made which have required loading a memory page from disk.
+
+ cmajflt %lu (13) The number of major faults that the process's waited-for children have made.
+
+ utime %lu (14) Amount of time that this process has been scheduled in user mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)). This includes guest time, guest_time (time spent running a virtual CPU, see below), so that applications that are not aware of the guest time field do not lose that time from their calculations.
+
+ stime %lu (15) Amount of time that this process has been scheduled in kernel mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)).
+ */
+
+ static char *loadfile = NULL;
+ static time_t last_call = 0;
+ static long ticks_per_s = 0;
+ static unsigned long last_utime, last_stime;
+
+ char buffer[64*1024];
+ FILE *stream = NULL;
+ time_t now = time(NULL);
+
+ if(load == NULL) {
+ return FALSE;
+ } else {
+ *load = 0.0;
+ }
+
+ if(loadfile == NULL) {
+ last_call = 0;
+ last_utime = 0;
+ last_stime = 0;
+ loadfile = find_cib_loadfile();
+ if (loadfile == NULL) {
+ crm_warn("Couldn't find CIB load file");
+ return FALSE;
+ }
+ ticks_per_s = sysconf(_SC_CLK_TCK);
+ crm_trace("Found %s", loadfile);
+ }
+
+ stream = fopen(loadfile, "r");
+ if(stream == NULL) {
+ int rc = errno;
+
+ crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_strerror(rc), rc);
+ free(loadfile); loadfile = NULL;
+ return FALSE;
+ }
+
+ if(fgets(buffer, sizeof(buffer), stream)) {
+ char *comm = calloc(1, 256);
+ char state = 0;
+ int rc = 0, pid = 0, ppid = 0, pgrp = 0, session = 0, tty_nr = 0, tpgid = 0;
+ unsigned long flags = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0, utime = 0, stime = 0;
+
+ rc = sscanf(buffer, "%d %[^ ] %c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu",
+ &pid, comm, &state,
+ &ppid, &pgrp, &session, &tty_nr, &tpgid,
+ &flags, &minflt, &cminflt, &majflt, &cmajflt, &utime, &stime);
+ free(comm);
+
+ if(rc != 15) {
+ crm_err("Only %d of 15 fields found in %s", rc, loadfile);
+ fclose(stream);
+ return FALSE;
+
+ } else if(last_call > 0
+ && last_call < now
+ && last_utime <= utime
+ && last_stime <= stime) {
+
+ time_t elapsed = now - last_call;
+ unsigned long delta_utime = utime - last_utime;
+ unsigned long delta_stime = stime - last_stime;
+
+ *load = (delta_utime + delta_stime); /* Cast to a float before division */
+ *load /= ticks_per_s;
+ *load /= elapsed;
+ crm_debug("cib load: %f (%lu ticks in %lds)", *load, delta_utime + delta_stime, (long)elapsed);
+
+ } else {
+ crm_debug("Init %lu + %lu ticks at %ld (%lu tps)", utime, stime, (long)now, ticks_per_s);
+ }
+
+ last_call = now;
+ last_utime = utime;
+ last_stime = stime;
+
+ fclose(stream);
+ return TRUE;
+ }
+
+ fclose(stream);
+ return FALSE;
+}
+
+static bool
+throttle_load_avg(float *load)
+{
+ char buffer[256];
+ FILE *stream = NULL;
+ const char *loadfile = "/proc/loadavg";
+
+ if(load == NULL) {
+ return FALSE;
+ }
+
+ stream = fopen(loadfile, "r");
+ if(stream == NULL) {
+ int rc = errno;
+ crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_strerror(rc), rc);
+ return FALSE;
+ }
+
+ if(fgets(buffer, sizeof(buffer), stream)) {
+ char *nl = strstr(buffer, "\n");
+
+ /* Grab the 1-minute average, ignore the rest */
+ *load = strtof(buffer, NULL);
+ if(nl) { nl[0] = 0; }
+
+ fclose(stream);
+ return TRUE;
+ }
+
+ fclose(stream);
+ return FALSE;
+}
+
+/*!
+ * \internal
+ * \brief Check a load value against throttling thresholds
+ *
+ * \param[in] load Load value to check
+ * \param[in] desc Description of metric (for logging)
+ * \param[in] thresholds Low/medium/high/extreme thresholds
+ *
+ * \return Throttle mode corresponding to load value
+ */
+static enum throttle_state_e
+throttle_check_thresholds(float load, const char *desc,
+ const float thresholds[4])
+{
+ if (load > thresholds[3]) {
+ crm_notice("Extreme %s detected: %f", desc, load);
+ return throttle_extreme;
+
+ } else if (load > thresholds[2]) {
+ crm_notice("High %s detected: %f", desc, load);
+ return throttle_high;
+
+ } else if (load > thresholds[1]) {
+ crm_info("Moderate %s detected: %f", desc, load);
+ return throttle_med;
+
+ } else if (load > thresholds[0]) {
+ crm_debug("Noticeable %s detected: %f", desc, load);
+ return throttle_low;
+ }
+
+ crm_trace("Negligible %s detected: %f", desc, load);
+ return throttle_none;
+}
+
+static enum throttle_state_e
+throttle_handle_load(float load, const char *desc, int cores)
+{
+ float normalize;
+ float thresholds[4];
+
+ if (cores == 1) {
+ /* On a single core machine, a load of 1.0 is already too high */
+ normalize = 0.6;
+
+ } else {
+ /* Normalize the load to be per-core */
+ normalize = cores;
+ }
+ thresholds[0] = throttle_load_target * normalize * THROTTLE_FACTOR_LOW;
+ thresholds[1] = throttle_load_target * normalize * THROTTLE_FACTOR_MEDIUM;
+ thresholds[2] = throttle_load_target * normalize * THROTTLE_FACTOR_HIGH;
+ thresholds[3] = load + 1.0; /* never extreme */
+
+ return throttle_check_thresholds(load, desc, thresholds);
+}
+#endif // HAVE_LINUX_PROCFS
+
+static enum throttle_state_e
+throttle_mode(void)
+{
+ enum throttle_state_e mode = throttle_none;
+
+#if HAVE_LINUX_PROCFS
+ unsigned int cores;
+ float load;
+ float thresholds[4];
+
+ cores = pcmk__procfs_num_cores();
+ if(throttle_cib_load(&load)) {
+ float cib_max_cpu = 0.95;
+
+ /* The CIB is a single-threaded task and thus cannot consume
+ * more than 100% of a CPU (and 1/cores of the overall system
+ * load).
+ *
+ * On a many-cored system, the CIB might therefore be maxed out
+ * (causing operations to fail or appear to fail) even though
+ * the overall system load is still reasonable.
+ *
+ * Therefore, the 'normal' thresholds can not apply here, and we
+ * need a special case.
+ */
+ if(cores == 1) {
+ cib_max_cpu = 0.4;
+ }
+ if(throttle_load_target > 0.0 && throttle_load_target < cib_max_cpu) {
+ cib_max_cpu = throttle_load_target;
+ }
+
+ thresholds[0] = cib_max_cpu * 0.8;
+ thresholds[1] = cib_max_cpu * 0.9;
+ thresholds[2] = cib_max_cpu;
+ /* Can only happen on machines with a low number of cores */
+ thresholds[3] = cib_max_cpu * 1.5;
+
+ mode = throttle_check_thresholds(load, "CIB load", thresholds);
+ }
+
+ if(throttle_load_target <= 0) {
+ /* If we ever make this a valid value, the cluster will at least behave as expected */
+ return mode;
+ }
+
+ if(throttle_load_avg(&load)) {
+ enum throttle_state_e cpu_load;
+
+ cpu_load = throttle_handle_load(load, "CPU load", cores);
+ if (cpu_load > mode) {
+ mode = cpu_load;
+ }
+ crm_debug("Current load is %f across %u core(s)", load, cores);
+ }
+#endif // HAVE_LINUX_PROCFS
+ return mode;
+}
+
+static void
+throttle_send_command(enum throttle_state_e mode)
+{
+ xmlNode *xml = NULL;
+ static enum throttle_state_e last = -1;
+
+ if(mode != last) {
+ crm_info("New throttle mode: %s load (was %s)",
+ load2str(mode), load2str(last));
+ last = mode;
+
+ xml = create_request(CRM_OP_THROTTLE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
+ crm_xml_add_int(xml, F_CRM_THROTTLE_MODE, mode);
+ crm_xml_add_int(xml, F_CRM_THROTTLE_MAX, throttle_job_max);
+
+ send_cluster_message(NULL, crm_msg_crmd, xml, TRUE);
+ free_xml(xml);
+ }
+}
+
+static gboolean
+throttle_timer_cb(gpointer data)
+{
+ throttle_send_command(throttle_mode());
+ return TRUE;
+}
+
+static void
+throttle_record_free(gpointer p)
+{
+ struct throttle_record_s *r = p;
+ free(r->node);
+ free(r);
+}
+
+static void
+throttle_set_load_target(float target)
+{
+ throttle_load_target = target;
+}
+
+/*!
+ * \internal
+ * \brief Update the maximum number of simultaneous jobs
+ *
+ * \param[in] preference Cluster-wide node-action-limit from the CIB
+ */
+static void
+throttle_update_job_max(const char *preference)
+{
+ long long max = 0LL;
+ const char *env_limit = getenv("PCMK_node_action_limit");
+
+ if (env_limit != NULL) {
+ preference = env_limit; // Per-node override
+ }
+ if (preference != NULL) {
+ pcmk__scan_ll(preference, &max, 0LL);
+ }
+ if (max > 0) {
+ throttle_job_max = (int) max;
+ } else {
+ // Default is based on the number of cores detected
+ throttle_job_max = 2 * pcmk__procfs_num_cores();
+ }
+}
+
+void
+throttle_init(void)
+{
+ if(throttle_records == NULL) {
+ throttle_records = pcmk__strkey_table(NULL, throttle_record_free);
+ throttle_timer = mainloop_timer_add("throttle", 30 * 1000, TRUE, throttle_timer_cb, NULL);
+ }
+
+ throttle_update_job_max(NULL);
+ mainloop_timer_start(throttle_timer);
+}
+
+/*!
+ * \internal
+ * \brief Configure throttle options based on the CIB
+ *
+ * \param[in,out] options Name/value pairs for configured options
+ */
+void
+controld_configure_throttle(GHashTable *options)
+{
+ const char *value = g_hash_table_lookup(options, "load-threshold");
+
+ if (value != NULL) {
+ throttle_set_load_target(strtof(value, NULL) / 100.0);
+ }
+
+ value = g_hash_table_lookup(options, "node-action-limit");
+ throttle_update_job_max(value);
+}
+
+void
+throttle_fini(void)
+{
+ if (throttle_timer != NULL) {
+ mainloop_timer_del(throttle_timer);
+ throttle_timer = NULL;
+ }
+ if (throttle_records != NULL) {
+ g_hash_table_destroy(throttle_records);
+ throttle_records = NULL;
+ }
+}
+
+int
+throttle_get_total_job_limit(int l)
+{
+ /* Cluster-wide limit */
+ GHashTableIter iter;
+ int limit = l;
+ int peers = crm_active_peers();
+ struct throttle_record_s *r = NULL;
+
+ g_hash_table_iter_init(&iter, throttle_records);
+
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &r)) {
+ switch(r->mode) {
+
+ case throttle_extreme:
+ if(limit == 0 || limit > peers/4) {
+ limit = QB_MAX(1, peers/4);
+ }
+ break;
+
+ case throttle_high:
+ if(limit == 0 || limit > peers/2) {
+ limit = QB_MAX(1, peers/2);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ if(limit == l) {
+ /* crm_trace("No change to batch-limit=%d", limit); */
+
+ } else if(l == 0) {
+ crm_trace("Using batch-limit=%d", limit);
+
+ } else {
+ crm_trace("Using batch-limit=%d instead of %d", limit, l);
+ }
+ return limit;
+}
+
+int
+throttle_get_job_limit(const char *node)
+{
+ int jobs = 1;
+ struct throttle_record_s *r = NULL;
+
+ r = g_hash_table_lookup(throttle_records, node);
+ if(r == NULL) {
+ r = calloc(1, sizeof(struct throttle_record_s));
+ r->node = strdup(node);
+ r->mode = throttle_low;
+ r->max = throttle_job_max;
+ crm_trace("Defaulting to local values for unknown node %s", node);
+
+ g_hash_table_insert(throttle_records, r->node, r);
+ }
+
+ switch(r->mode) {
+ case throttle_extreme:
+ case throttle_high:
+ jobs = 1; /* At least one job must always be allowed */
+ break;
+ case throttle_med:
+ jobs = QB_MAX(1, r->max / 4);
+ break;
+ case throttle_low:
+ jobs = QB_MAX(1, r->max / 2);
+ break;
+ case throttle_none:
+ jobs = QB_MAX(1, r->max);
+ break;
+ default:
+ crm_err("Unknown throttle mode %.4x on %s", r->mode, node);
+ break;
+ }
+ return jobs;
+}
+
+void
+throttle_update(xmlNode *xml)
+{
+ int max = 0;
+ int mode = 0;
+ struct throttle_record_s *r = NULL;
+ const char *from = crm_element_value(xml, F_CRM_HOST_FROM);
+
+ crm_element_value_int(xml, F_CRM_THROTTLE_MODE, &mode);
+ crm_element_value_int(xml, F_CRM_THROTTLE_MAX, &max);
+
+ r = g_hash_table_lookup(throttle_records, from);
+
+ if(r == NULL) {
+ r = calloc(1, sizeof(struct throttle_record_s));
+ r->node = strdup(from);
+ g_hash_table_insert(throttle_records, r->node, r);
+ }
+
+ r->max = max;
+ r->mode = (enum throttle_state_e) mode;
+
+ crm_debug("Node %s has %s load and supports at most %d jobs; new job limit %d",
+ from, load2str((enum throttle_state_e) mode), max,
+ throttle_get_job_limit(from));
+}
diff --git a/daemons/controld/controld_throttle.h b/daemons/controld/controld_throttle.h
new file mode 100644
index 0000000..a798c6c
--- /dev/null
+++ b/daemons/controld/controld_throttle.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright 2013-2021 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+void throttle_init(void);
+void throttle_fini(void);
+void controld_configure_throttle(GHashTable *options);
+
+void throttle_update(xmlNode *xml);
+int throttle_get_job_limit(const char *node);
+int throttle_get_total_job_limit(int l);
diff --git a/daemons/controld/controld_timers.c b/daemons/controld/controld_timers.c
new file mode 100644
index 0000000..a65bef5
--- /dev/null
+++ b/daemons/controld/controld_timers.c
@@ -0,0 +1,509 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <time.h>
+#include <stdlib.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <pacemaker-controld.h>
+
+//! FSA mainloop timer type
+typedef struct fsa_timer_s {
+ guint source_id; //!< Timer source ID
+ guint period_ms; //!< Timer period
+ enum crmd_fsa_input fsa_input; //!< Input to register if timer pops
+ gboolean (*callback) (gpointer data); //!< What do if timer pops
+ bool log_error; //!< Timer popping indicates error
+ int counter; //!< For detecting loops
+} fsa_timer_t;
+
+//! Wait before retrying a failed cib or executor connection
+static fsa_timer_t *wait_timer = NULL;
+
+//! Periodically re-run scheduler (for date_spec evaluation and as a failsafe)
+static fsa_timer_t *recheck_timer = NULL;
+
+//! Wait at start-up, or after an election, for DC to make contact
+static fsa_timer_t *election_timer = NULL;
+
+//! Delay start of new transition with expectation something else might happen
+static fsa_timer_t *transition_timer = NULL;
+
+//! join-integration-timeout
+static fsa_timer_t *integration_timer = NULL;
+
+//! join-finalization-timeout
+static fsa_timer_t *finalization_timer = NULL;
+
+// Wait for DC to stop all resources and give us the all-clear to shut down
+fsa_timer_t *shutdown_escalation_timer = NULL;
+
+//! Cluster recheck interval (from configuration)
+static guint recheck_interval_ms = 0;
+
+static const char *
+get_timer_desc(fsa_timer_t * timer)
+{
+ if (timer == election_timer) {
+ return "Election Trigger";
+
+ } else if (timer == shutdown_escalation_timer) {
+ return "Shutdown Escalation";
+
+ } else if (timer == integration_timer) {
+ return "Integration Timer";
+
+ } else if (timer == finalization_timer) {
+ return "Finalization Timer";
+
+ } else if (timer == transition_timer) {
+ return "New Transition Timer";
+
+ } else if (timer == wait_timer) {
+ return "Wait Timer";
+
+ } else if (timer == recheck_timer) {
+ return "Cluster Recheck Timer";
+
+ }
+ return "Unknown Timer";
+}
+
+/*!
+ * \internal
+ * \brief Stop an FSA timer
+ *
+ * \param[in,out] timer Timer to stop
+ *
+ * \return true if the timer was running, or false otherwise
+ */
+static bool
+controld_stop_timer(fsa_timer_t *timer)
+{
+ CRM_CHECK(timer != NULL, return false);
+
+ if (timer->source_id != 0) {
+ crm_trace("Stopping %s (would inject %s if popped after %ums, src=%d)",
+ get_timer_desc(timer), fsa_input2string(timer->fsa_input),
+ timer->period_ms, timer->source_id);
+ g_source_remove(timer->source_id);
+ timer->source_id = 0;
+
+ } else {
+ crm_trace("%s already stopped (would inject %s if popped after %ums)",
+ get_timer_desc(timer), fsa_input2string(timer->fsa_input),
+ timer->period_ms);
+ return false;
+ }
+ return true;
+}
+
+/*!
+ * \internal
+ * \brief Start an FSA timer
+ *
+ * \param[in,out] timer Timer to start
+ */
+static void
+controld_start_timer(fsa_timer_t *timer)
+{
+ if (timer->source_id == 0 && timer->period_ms > 0) {
+ timer->source_id = g_timeout_add(timer->period_ms, timer->callback, (void *)timer);
+ CRM_ASSERT(timer->source_id != 0);
+ crm_debug("Started %s (inject %s if pops after %ums, source=%d)",
+ get_timer_desc(timer), fsa_input2string(timer->fsa_input),
+ timer->period_ms, timer->source_id);
+ } else {
+ crm_debug("%s already running (inject %s if pops after %ums, source=%d)",
+ get_timer_desc(timer), fsa_input2string(timer->fsa_input),
+ timer->period_ms, timer->source_id);
+ }
+}
+
+/* A_DC_TIMER_STOP, A_DC_TIMER_START,
+ * A_FINALIZE_TIMER_STOP, A_FINALIZE_TIMER_START
+ * A_INTEGRATE_TIMER_STOP, A_INTEGRATE_TIMER_START
+ */
+void
+do_timer_control(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ gboolean timer_op_ok = TRUE;
+
+ if (action & A_DC_TIMER_STOP) {
+ timer_op_ok = controld_stop_timer(election_timer);
+
+ } else if (action & A_FINALIZE_TIMER_STOP) {
+ timer_op_ok = controld_stop_timer(finalization_timer);
+
+ } else if (action & A_INTEGRATE_TIMER_STOP) {
+ timer_op_ok = controld_stop_timer(integration_timer);
+ }
+
+ /* don't start a timer that wasn't already running */
+ if (action & A_DC_TIMER_START && timer_op_ok) {
+ controld_start_timer(election_timer);
+ if (AM_I_DC) {
+ /* there can be only one */
+ register_fsa_input(cause, I_ELECTION, NULL);
+ }
+
+ } else if (action & A_FINALIZE_TIMER_START) {
+ controld_start_timer(finalization_timer);
+
+ } else if (action & A_INTEGRATE_TIMER_START) {
+ controld_start_timer(integration_timer);
+ }
+}
+
+static gboolean
+crm_timer_popped(gpointer data)
+{
+ fsa_timer_t *timer = (fsa_timer_t *) data;
+
+ if (timer->log_error) {
+ crm_err("%s just popped in state %s! " CRM_XS " input=%s time=%ums",
+ get_timer_desc(timer),
+ fsa_state2string(controld_globals.fsa_state),
+ fsa_input2string(timer->fsa_input), timer->period_ms);
+ } else {
+ crm_info("%s just popped " CRM_XS " input=%s time=%ums",
+ get_timer_desc(timer), fsa_input2string(timer->fsa_input),
+ timer->period_ms);
+ timer->counter++;
+ }
+
+ if ((timer == election_timer) && (election_timer->counter > 5)) {
+ crm_notice("We appear to be in an election loop, something may be wrong");
+ crm_write_blackbox(0, NULL);
+ election_timer->counter = 0;
+ }
+
+ controld_stop_timer(timer); // Make timer _not_ go off again
+
+ if (timer->fsa_input == I_INTEGRATED) {
+ crm_info("Welcomed: %d, Integrated: %d",
+ crmd_join_phase_count(crm_join_welcomed),
+ crmd_join_phase_count(crm_join_integrated));
+ if (crmd_join_phase_count(crm_join_welcomed) == 0) {
+ // If we don't even have ourselves, start again
+ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, NULL,
+ __func__);
+
+ } else {
+ register_fsa_input_before(C_TIMER_POPPED, timer->fsa_input, NULL);
+ }
+
+ } else if ((timer == recheck_timer)
+ && (controld_globals.fsa_state != S_IDLE)) {
+ crm_debug("Discarding %s event in state: %s",
+ fsa_input2string(timer->fsa_input),
+ fsa_state2string(controld_globals.fsa_state));
+
+ } else if ((timer == finalization_timer)
+ && (controld_globals.fsa_state != S_FINALIZE_JOIN)) {
+ crm_debug("Discarding %s event in state: %s",
+ fsa_input2string(timer->fsa_input),
+ fsa_state2string(controld_globals.fsa_state));
+
+ } else if (timer->fsa_input != I_NULL) {
+ register_fsa_input(C_TIMER_POPPED, timer->fsa_input, NULL);
+ }
+
+ controld_trigger_fsa();
+
+ return TRUE;
+}
+
+bool
+controld_init_fsa_timers(void)
+{
+ transition_timer = calloc(1, sizeof(fsa_timer_t));
+ if (transition_timer == NULL) {
+ return FALSE;
+ }
+
+ integration_timer = calloc(1, sizeof(fsa_timer_t));
+ if (integration_timer == NULL) {
+ return FALSE;
+ }
+
+ finalization_timer = calloc(1, sizeof(fsa_timer_t));
+ if (finalization_timer == NULL) {
+ return FALSE;
+ }
+
+ election_timer = calloc(1, sizeof(fsa_timer_t));
+ if (election_timer == NULL) {
+ return FALSE;
+ }
+
+ shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t));
+ if (shutdown_escalation_timer == NULL) {
+ return FALSE;
+ }
+
+ wait_timer = calloc(1, sizeof(fsa_timer_t));
+ if (wait_timer == NULL) {
+ return FALSE;
+ }
+
+ recheck_timer = calloc(1, sizeof(fsa_timer_t));
+ if (recheck_timer == NULL) {
+ return FALSE;
+ }
+
+ election_timer->source_id = 0;
+ election_timer->period_ms = 0;
+ election_timer->fsa_input = I_DC_TIMEOUT;
+ election_timer->callback = crm_timer_popped;
+ election_timer->log_error = FALSE;
+
+ transition_timer->source_id = 0;
+ transition_timer->period_ms = 0;
+ transition_timer->fsa_input = I_PE_CALC;
+ transition_timer->callback = crm_timer_popped;
+ transition_timer->log_error = FALSE;
+
+ integration_timer->source_id = 0;
+ integration_timer->period_ms = 0;
+ integration_timer->fsa_input = I_INTEGRATED;
+ integration_timer->callback = crm_timer_popped;
+ integration_timer->log_error = TRUE;
+
+ finalization_timer->source_id = 0;
+ finalization_timer->period_ms = 0;
+ finalization_timer->fsa_input = I_FINALIZED;
+ finalization_timer->callback = crm_timer_popped;
+ finalization_timer->log_error = FALSE;
+
+ /* We can't use I_FINALIZED here, because that creates a bug in the join
+ * process where a joining node can be stuck in S_PENDING while we think it
+ * is in S_NOT_DC. This created an infinite transition loop in which we
+ * continually send probes which the node NACKs because it's pending.
+ *
+ * If we have nodes where the cluster layer is active but the controller is
+ * not, we can avoid this causing an election/join loop, in the integration
+ * phase.
+ */
+ finalization_timer->fsa_input = I_ELECTION;
+
+ shutdown_escalation_timer->source_id = 0;
+ shutdown_escalation_timer->period_ms = 0;
+ shutdown_escalation_timer->fsa_input = I_STOP;
+ shutdown_escalation_timer->callback = crm_timer_popped;
+ shutdown_escalation_timer->log_error = TRUE;
+
+ wait_timer->source_id = 0;
+ wait_timer->period_ms = 2000;
+ wait_timer->fsa_input = I_NULL;
+ wait_timer->callback = crm_timer_popped;
+ wait_timer->log_error = FALSE;
+
+ recheck_timer->source_id = 0;
+ recheck_timer->period_ms = 0;
+ recheck_timer->fsa_input = I_PE_CALC;
+ recheck_timer->callback = crm_timer_popped;
+ recheck_timer->log_error = FALSE;
+
+ return TRUE;
+}
+
+/*!
+ * \internal
+ * \brief Configure timers based on the CIB
+ *
+ * \param[in,out] options Name/value pairs for configured options
+ */
+void
+controld_configure_fsa_timers(GHashTable *options)
+{
+ const char *value = NULL;
+
+ // Election timer
+ value = g_hash_table_lookup(options, XML_CONFIG_ATTR_DC_DEADTIME);
+ election_timer->period_ms = crm_parse_interval_spec(value);
+
+ // Integration timer
+ value = g_hash_table_lookup(options, "join-integration-timeout");
+ integration_timer->period_ms = crm_parse_interval_spec(value);
+
+ // Finalization timer
+ value = g_hash_table_lookup(options, "join-finalization-timeout");
+ finalization_timer->period_ms = crm_parse_interval_spec(value);
+
+ // Shutdown escalation timer
+ value = g_hash_table_lookup(options, XML_CONFIG_ATTR_FORCE_QUIT);
+ shutdown_escalation_timer->period_ms = crm_parse_interval_spec(value);
+ crm_debug("Shutdown escalation occurs if DC has not responded to request "
+ "in %ums", shutdown_escalation_timer->period_ms);
+
+ // Transition timer
+ value = g_hash_table_lookup(options, "transition-delay");
+ transition_timer->period_ms = crm_parse_interval_spec(value);
+
+ // Recheck interval
+ value = g_hash_table_lookup(options, XML_CONFIG_ATTR_RECHECK);
+ recheck_interval_ms = crm_parse_interval_spec(value);
+ crm_debug("Re-run scheduler after %dms of inactivity", recheck_interval_ms);
+}
+
+void
+controld_free_fsa_timers(void)
+{
+ controld_stop_timer(transition_timer);
+ controld_stop_timer(integration_timer);
+ controld_stop_timer(finalization_timer);
+ controld_stop_timer(election_timer);
+ controld_stop_timer(shutdown_escalation_timer);
+ controld_stop_timer(wait_timer);
+ controld_stop_timer(recheck_timer);
+
+ free(transition_timer); transition_timer = NULL;
+ free(integration_timer); integration_timer = NULL;
+ free(finalization_timer); finalization_timer = NULL;
+ free(election_timer); election_timer = NULL;
+ free(shutdown_escalation_timer); shutdown_escalation_timer = NULL;
+ free(wait_timer); wait_timer = NULL;
+ free(recheck_timer); recheck_timer = NULL;
+}
+
+/*!
+ * \internal
+ * \brief Check whether the transition timer is started
+ * \return true if the transition timer is started, or false otherwise
+ */
+bool
+controld_is_started_transition_timer(void)
+{
+ return (transition_timer->period_ms > 0)
+ && (transition_timer->source_id != 0);
+}
+
+/*!
+ * \internal
+ * \brief Start the recheck timer
+ */
+void
+controld_start_recheck_timer(void)
+{
+ // Default to recheck interval configured in CIB (if any)
+ guint period_ms = recheck_interval_ms;
+
+ // If scheduler supplied a "recheck by" time, check whether that's sooner
+ if (controld_globals.transition_graph->recheck_by > 0) {
+ time_t diff_seconds = controld_globals.transition_graph->recheck_by
+ - time(NULL);
+
+ if (diff_seconds < 1) {
+ // We're already past the desired time
+ period_ms = 500;
+ } else {
+ period_ms = (guint) diff_seconds * 1000;
+ }
+
+ // Use "recheck by" only if it's sooner than interval from CIB
+ if (period_ms > recheck_interval_ms) {
+ period_ms = recheck_interval_ms;
+ }
+ }
+
+ if (period_ms > 0) {
+ recheck_timer->period_ms = period_ms;
+ controld_start_timer(recheck_timer);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Start the wait timer
+ */
+void
+controld_start_wait_timer(void)
+{
+ controld_start_timer(wait_timer);
+}
+
+/*!
+ * \internal
+ * \brief Stop the recheck timer
+ *
+ * \return true if the recheck timer was running, or false otherwise
+ */
+bool
+controld_stop_recheck_timer(void)
+{
+ return controld_stop_timer(recheck_timer);
+}
+
+/*!
+ * \brief Get the transition timer's configured period
+ * \return The transition_timer's period
+ */
+guint
+controld_get_period_transition_timer(void)
+{
+ return transition_timer->period_ms;
+}
+
+/*!
+ * \internal
+ * \brief Reset the election timer's counter to 0
+ */
+void
+controld_reset_counter_election_timer(void)
+{
+ election_timer->counter = 0;
+}
+
+/*!
+ * \internal
+ * \brief Stop the transition timer
+ *
+ * \return true if the transition timer was running, or false otherwise
+ */
+bool
+controld_stop_transition_timer(void)
+{
+ return controld_stop_timer(transition_timer);
+}
+
+/*!
+ * \internal
+ * \brief Start the transition timer
+ */
+void
+controld_start_transition_timer(void)
+{
+ controld_start_timer(transition_timer);
+}
+
+/*!
+ * \internal
+ * \brief Start the countdown sequence for a shutdown
+ *
+ * \param[in] default_period_ms Period to use if the shutdown escalation
+ * timer's period is 0
+ */
+void
+controld_shutdown_start_countdown(guint default_period_ms)
+{
+ if (shutdown_escalation_timer->period_ms == 0) {
+ shutdown_escalation_timer->period_ms = default_period_ms;
+ }
+
+ crm_notice("Initiating controller shutdown sequence " CRM_XS " limit=%ums",
+ shutdown_escalation_timer->period_ms);
+ controld_start_timer(shutdown_escalation_timer);
+}
diff --git a/daemons/controld/controld_timers.h b/daemons/controld/controld_timers.h
new file mode 100644
index 0000000..587f4d1
--- /dev/null
+++ b/daemons/controld/controld_timers.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef CONTROLD_TIMERS__H
+# define CONTROLD_TIMERS__H
+
+# include <stdbool.h> // bool
+# include <glib.h> // gboolean, gpointer, guint
+# include <controld_fsa.h> // crmd_fsa_input
+
+bool controld_init_fsa_timers(void);
+void controld_free_fsa_timers(void);
+void controld_configure_fsa_timers(GHashTable *options);
+
+bool controld_stop_recheck_timer(void);
+bool controld_stop_transition_timer(void);
+
+void controld_start_recheck_timer(void);
+void controld_start_transition_timer(void);
+void controld_start_wait_timer(void);
+
+bool controld_is_started_transition_timer(void);
+
+guint controld_get_period_transition_timer(void);
+
+void controld_reset_counter_election_timer(void);
+
+void controld_shutdown_start_countdown(guint default_period_ms);
+
+#endif
diff --git a/daemons/controld/controld_transition.c b/daemons/controld/controld_transition.c
new file mode 100644
index 0000000..c8a342c
--- /dev/null
+++ b/daemons/controld/controld_transition.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+
+#include <pacemaker-controld.h>
+
+static void
+global_cib_callback(const xmlNode * msg, int callid, int rc, xmlNode * output)
+{
+}
+
+static pcmk__graph_t *
+create_blank_graph(void)
+{
+ pcmk__graph_t *a_graph = pcmk__unpack_graph(NULL, NULL);
+
+ a_graph->complete = true;
+ a_graph->abort_reason = "DC Takeover";
+ a_graph->completion_action = pcmk__graph_restart;
+ return a_graph;
+}
+
+/* A_TE_START, A_TE_STOP, O_TE_RESTART */
+void
+do_te_control(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ cib_t *cib_conn = controld_globals.cib_conn;
+ gboolean init_ok = TRUE;
+
+ if (pcmk_is_set(action, A_TE_STOP)) {
+ pcmk__free_graph(controld_globals.transition_graph);
+ controld_globals.transition_graph = NULL;
+
+ if (cib_conn != NULL) {
+ cib_conn->cmds->del_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY,
+ te_update_diff);
+ }
+
+ controld_clear_fsa_input_flags(R_TE_CONNECTED);
+ crm_info("Transitioner is now inactive");
+ }
+
+ if ((action & A_TE_START) == 0) {
+ return;
+
+ } else if (pcmk_is_set(controld_globals.fsa_input_register,
+ R_TE_CONNECTED)) {
+ crm_debug("The transitioner is already active");
+ return;
+
+ } else if ((action & A_TE_START) && cur_state == S_STOPPING) {
+ crm_info("Ignoring request to start the transitioner while shutting down");
+ return;
+ }
+
+ if (controld_globals.te_uuid == NULL) {
+ controld_globals.te_uuid = crm_generate_uuid();
+ crm_info("Registering TE UUID: %s", controld_globals.te_uuid);
+ }
+
+ if (cib_conn == NULL) {
+ crm_err("Could not set CIB callbacks");
+ init_ok = FALSE;
+
+ } else {
+ if (cib_conn->cmds->add_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY,
+ te_update_diff) != pcmk_ok) {
+ crm_err("Could not set CIB notification callback");
+ init_ok = FALSE;
+ }
+
+ if (cib_conn->cmds->set_op_callback(cib_conn,
+ global_cib_callback) != pcmk_ok) {
+ crm_err("Could not set CIB global callback");
+ init_ok = FALSE;
+ }
+ }
+
+ if (init_ok) {
+ controld_register_graph_functions();
+ pcmk__free_graph(controld_globals.transition_graph);
+
+ /* create a blank one */
+ crm_debug("Transitioner is now active");
+ controld_globals.transition_graph = create_blank_graph();
+ controld_set_fsa_input_flags(R_TE_CONNECTED);
+ }
+}
+
+/* A_TE_INVOKE, A_TE_CANCEL */
+void
+do_te_invoke(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+
+ if (!AM_I_DC
+ || ((controld_globals.fsa_state != S_TRANSITION_ENGINE)
+ && pcmk_is_set(action, A_TE_INVOKE))) {
+ crm_notice("No need to invoke the TE (%s) in state %s",
+ fsa_action2string(action),
+ fsa_state2string(controld_globals.fsa_state));
+ return;
+ }
+
+ if (action & A_TE_CANCEL) {
+ crm_debug("Cancelling the transition: %sactive",
+ controld_globals.transition_graph->complete? "in" : "");
+ abort_transition(INFINITY, pcmk__graph_restart, "Peer Cancelled", NULL);
+ if (!controld_globals.transition_graph->complete) {
+ crmd_fsa_stall(FALSE);
+ }
+
+ } else if (action & A_TE_HALT) {
+ abort_transition(INFINITY, pcmk__graph_wait, "Peer Halt", NULL);
+ if (!controld_globals.transition_graph->complete) {
+ crmd_fsa_stall(FALSE);
+ }
+
+ } else if (action & A_TE_INVOKE) {
+ ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
+ xmlNode *graph_data = input->xml;
+ const char *ref = crm_element_value(input->msg, XML_ATTR_REFERENCE);
+ const char *graph_file = crm_element_value(input->msg, F_CRM_TGRAPH);
+ const char *graph_input = crm_element_value(input->msg, F_CRM_TGRAPH_INPUT);
+
+ if (graph_file == NULL && graph_data == NULL) {
+ crm_log_xml_err(input->msg, "Bad command");
+ register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
+ return;
+ }
+
+ if (!controld_globals.transition_graph->complete) {
+ crm_info("Another transition is already active");
+ abort_transition(INFINITY, pcmk__graph_restart, "Transition Active",
+ NULL);
+ return;
+ }
+
+ if ((controld_globals.fsa_pe_ref == NULL)
+ || !pcmk__str_eq(controld_globals.fsa_pe_ref, ref,
+ pcmk__str_none)) {
+ crm_info("Transition is redundant: %s expected but %s received",
+ pcmk__s(controld_globals.fsa_pe_ref, "no reference"),
+ pcmk__s(ref, "no reference"));
+ abort_transition(INFINITY, pcmk__graph_restart,
+ "Transition Redundant", NULL);
+ }
+
+ if (graph_data == NULL && graph_file != NULL) {
+ graph_data = filename2xml(graph_file);
+ }
+
+ if (controld_is_started_transition_timer()) {
+ crm_debug("The transitioner wait for a transition timer");
+ return;
+ }
+
+ CRM_CHECK(graph_data != NULL,
+ crm_err("Input raised by %s is invalid", msg_data->origin);
+ crm_log_xml_err(input->msg, "Bad command");
+ return);
+
+ pcmk__free_graph(controld_globals.transition_graph);
+ controld_globals.transition_graph = pcmk__unpack_graph(graph_data,
+ graph_input);
+ CRM_CHECK(controld_globals.transition_graph != NULL,
+ controld_globals.transition_graph = create_blank_graph();
+ return);
+ crm_info("Processing graph %d (ref=%s) derived from %s",
+ controld_globals.transition_graph->id, ref, graph_input);
+
+ te_reset_job_counts();
+
+ trigger_graph();
+ pcmk__log_graph(LOG_TRACE, controld_globals.transition_graph);
+
+ if (graph_data != input->xml) {
+ free_xml(graph_data);
+ }
+ }
+}
diff --git a/daemons/controld/controld_transition.h b/daemons/controld/controld_transition.h
new file mode 100644
index 0000000..2da4221
--- /dev/null
+++ b/daemons/controld/controld_transition.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef TENGINE__H
+# define TENGINE__H
+
+# include <crm/common/mainloop.h>
+# include <crm/stonith-ng.h>
+# include <crm/services.h>
+# include <pacemaker-internal.h>
+
+/* tengine */
+pcmk__graph_action_t *match_down_event(const char *target);
+pcmk__graph_action_t *get_cancel_action(const char *id, const char *node);
+bool confirm_cancel_action(const char *id, const char *node_id);
+
+void controld_record_action_timeout(pcmk__graph_action_t *action);
+
+void controld_destroy_outside_events_table(void);
+void controld_remove_all_outside_events(void);
+
+gboolean fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node);
+void process_graph_event(xmlNode *event, const char *event_node);
+
+/* utils */
+pcmk__graph_action_t *controld_get_action(int id);
+gboolean stop_te_timer(pcmk__graph_action_t *action);
+const char *get_rsc_state(const char *task, enum pcmk_exec_status status);
+
+void process_te_message(xmlNode *msg, xmlNode *xml_data);
+
+void controld_register_graph_functions(void);
+
+void notify_crmd(pcmk__graph_t * graph);
+
+void cib_action_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
+ void *user_data);
+gboolean action_timer_callback(gpointer data);
+void te_update_diff(const char *event, xmlNode *msg);
+
+void controld_init_transition_trigger(void);
+void controld_destroy_transition_trigger(void);
+
+void controld_trigger_graph_as(const char *fn, int line);
+void abort_after_delay(int abort_priority, enum pcmk__graph_next abort_action,
+ const char *abort_text, guint delay_ms);
+void abort_transition_graph(int abort_priority,
+ enum pcmk__graph_next abort_action,
+ const char *abort_text, const xmlNode *reason,
+ const char *fn, int line);
+
+# define trigger_graph() controld_trigger_graph_as(__func__, __LINE__)
+# define abort_transition(pri, action, text, reason) \
+ abort_transition_graph(pri, action, text, reason,__func__,__LINE__);
+
+void te_action_confirmed(pcmk__graph_action_t *action, pcmk__graph_t *graph);
+void te_reset_job_counts(void);
+
+#endif
diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c
new file mode 100644
index 0000000..4ce09d9
--- /dev/null
+++ b/daemons/controld/controld_utils.c
@@ -0,0 +1,837 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdlib.h>
+#include <stdint.h> // uint64_t
+
+#include <crm/crm.h>
+#include <crm/cib.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+
+#include <pacemaker-controld.h>
+
+const char *
+fsa_input2string(enum crmd_fsa_input input)
+{
+ const char *inputAsText = NULL;
+
+ switch (input) {
+ case I_NULL:
+ inputAsText = "I_NULL";
+ break;
+ case I_CIB_OP:
+ inputAsText = "I_CIB_OP (unused)";
+ break;
+ case I_CIB_UPDATE:
+ inputAsText = "I_CIB_UPDATE";
+ break;
+ case I_DC_TIMEOUT:
+ inputAsText = "I_DC_TIMEOUT";
+ break;
+ case I_ELECTION:
+ inputAsText = "I_ELECTION";
+ break;
+ case I_PE_CALC:
+ inputAsText = "I_PE_CALC";
+ break;
+ case I_RELEASE_DC:
+ inputAsText = "I_RELEASE_DC";
+ break;
+ case I_ELECTION_DC:
+ inputAsText = "I_ELECTION_DC";
+ break;
+ case I_ERROR:
+ inputAsText = "I_ERROR";
+ break;
+ case I_FAIL:
+ inputAsText = "I_FAIL";
+ break;
+ case I_INTEGRATED:
+ inputAsText = "I_INTEGRATED";
+ break;
+ case I_FINALIZED:
+ inputAsText = "I_FINALIZED";
+ break;
+ case I_NODE_JOIN:
+ inputAsText = "I_NODE_JOIN";
+ break;
+ case I_JOIN_OFFER:
+ inputAsText = "I_JOIN_OFFER";
+ break;
+ case I_JOIN_REQUEST:
+ inputAsText = "I_JOIN_REQUEST";
+ break;
+ case I_JOIN_RESULT:
+ inputAsText = "I_JOIN_RESULT";
+ break;
+ case I_NOT_DC:
+ inputAsText = "I_NOT_DC";
+ break;
+ case I_RECOVERED:
+ inputAsText = "I_RECOVERED";
+ break;
+ case I_RELEASE_FAIL:
+ inputAsText = "I_RELEASE_FAIL";
+ break;
+ case I_RELEASE_SUCCESS:
+ inputAsText = "I_RELEASE_SUCCESS";
+ break;
+ case I_RESTART:
+ inputAsText = "I_RESTART";
+ break;
+ case I_PE_SUCCESS:
+ inputAsText = "I_PE_SUCCESS";
+ break;
+ case I_ROUTER:
+ inputAsText = "I_ROUTER";
+ break;
+ case I_SHUTDOWN:
+ inputAsText = "I_SHUTDOWN";
+ break;
+ case I_STARTUP:
+ inputAsText = "I_STARTUP";
+ break;
+ case I_TE_SUCCESS:
+ inputAsText = "I_TE_SUCCESS";
+ break;
+ case I_STOP:
+ inputAsText = "I_STOP";
+ break;
+ case I_DC_HEARTBEAT:
+ inputAsText = "I_DC_HEARTBEAT";
+ break;
+ case I_WAIT_FOR_EVENT:
+ inputAsText = "I_WAIT_FOR_EVENT";
+ break;
+ case I_LRM_EVENT:
+ inputAsText = "I_LRM_EVENT";
+ break;
+ case I_PENDING:
+ inputAsText = "I_PENDING";
+ break;
+ case I_HALT:
+ inputAsText = "I_HALT";
+ break;
+ case I_TERMINATE:
+ inputAsText = "I_TERMINATE";
+ break;
+ case I_ILLEGAL:
+ inputAsText = "I_ILLEGAL";
+ break;
+ }
+
+ if (inputAsText == NULL) {
+ crm_err("Input %d is unknown", input);
+ inputAsText = "<UNKNOWN_INPUT>";
+ }
+
+ return inputAsText;
+}
+
+const char *
+fsa_state2string(enum crmd_fsa_state state)
+{
+ const char *stateAsText = NULL;
+
+ switch (state) {
+ case S_IDLE:
+ stateAsText = "S_IDLE";
+ break;
+ case S_ELECTION:
+ stateAsText = "S_ELECTION";
+ break;
+ case S_INTEGRATION:
+ stateAsText = "S_INTEGRATION";
+ break;
+ case S_FINALIZE_JOIN:
+ stateAsText = "S_FINALIZE_JOIN";
+ break;
+ case S_NOT_DC:
+ stateAsText = "S_NOT_DC";
+ break;
+ case S_POLICY_ENGINE:
+ stateAsText = "S_POLICY_ENGINE";
+ break;
+ case S_RECOVERY:
+ stateAsText = "S_RECOVERY";
+ break;
+ case S_RELEASE_DC:
+ stateAsText = "S_RELEASE_DC";
+ break;
+ case S_PENDING:
+ stateAsText = "S_PENDING";
+ break;
+ case S_STOPPING:
+ stateAsText = "S_STOPPING";
+ break;
+ case S_TERMINATE:
+ stateAsText = "S_TERMINATE";
+ break;
+ case S_TRANSITION_ENGINE:
+ stateAsText = "S_TRANSITION_ENGINE";
+ break;
+ case S_STARTING:
+ stateAsText = "S_STARTING";
+ break;
+ case S_HALT:
+ stateAsText = "S_HALT";
+ break;
+ case S_ILLEGAL:
+ stateAsText = "S_ILLEGAL";
+ break;
+ }
+
+ if (stateAsText == NULL) {
+ crm_err("State %d is unknown", state);
+ stateAsText = "<UNKNOWN_STATE>";
+ }
+
+ return stateAsText;
+}
+
+const char *
+fsa_cause2string(enum crmd_fsa_cause cause)
+{
+ const char *causeAsText = NULL;
+
+ switch (cause) {
+ case C_UNKNOWN:
+ causeAsText = "C_UNKNOWN";
+ break;
+ case C_STARTUP:
+ causeAsText = "C_STARTUP";
+ break;
+ case C_IPC_MESSAGE:
+ causeAsText = "C_IPC_MESSAGE";
+ break;
+ case C_HA_MESSAGE:
+ causeAsText = "C_HA_MESSAGE";
+ break;
+ case C_TIMER_POPPED:
+ causeAsText = "C_TIMER_POPPED";
+ break;
+ case C_SHUTDOWN:
+ causeAsText = "C_SHUTDOWN";
+ break;
+ case C_LRM_OP_CALLBACK:
+ causeAsText = "C_LRM_OP_CALLBACK";
+ break;
+ case C_CRMD_STATUS_CALLBACK:
+ causeAsText = "C_CRMD_STATUS_CALLBACK";
+ break;
+ case C_FSA_INTERNAL:
+ causeAsText = "C_FSA_INTERNAL";
+ break;
+ }
+
+ if (causeAsText == NULL) {
+ crm_err("Cause %d is unknown", cause);
+ causeAsText = "<UNKNOWN_CAUSE>";
+ }
+
+ return causeAsText;
+}
+
+const char *
+fsa_action2string(long long action)
+{
+ const char *actionAsText = NULL;
+
+ switch (action) {
+
+ case A_NOTHING:
+ actionAsText = "A_NOTHING";
+ break;
+ case A_ELECTION_START:
+ actionAsText = "A_ELECTION_START";
+ break;
+ case A_DC_JOIN_FINAL:
+ actionAsText = "A_DC_JOIN_FINAL";
+ break;
+ case A_READCONFIG:
+ actionAsText = "A_READCONFIG";
+ break;
+ case O_RELEASE:
+ actionAsText = "O_RELEASE";
+ break;
+ case A_STARTUP:
+ actionAsText = "A_STARTUP";
+ break;
+ case A_STARTED:
+ actionAsText = "A_STARTED";
+ break;
+ case A_HA_CONNECT:
+ actionAsText = "A_HA_CONNECT";
+ break;
+ case A_HA_DISCONNECT:
+ actionAsText = "A_HA_DISCONNECT";
+ break;
+ case A_LRM_CONNECT:
+ actionAsText = "A_LRM_CONNECT";
+ break;
+ case A_LRM_EVENT:
+ actionAsText = "A_LRM_EVENT";
+ break;
+ case A_LRM_INVOKE:
+ actionAsText = "A_LRM_INVOKE";
+ break;
+ case A_LRM_DISCONNECT:
+ actionAsText = "A_LRM_DISCONNECT";
+ break;
+ case O_LRM_RECONNECT:
+ actionAsText = "O_LRM_RECONNECT";
+ break;
+ case A_CL_JOIN_QUERY:
+ actionAsText = "A_CL_JOIN_QUERY";
+ break;
+ case A_DC_TIMER_STOP:
+ actionAsText = "A_DC_TIMER_STOP";
+ break;
+ case A_DC_TIMER_START:
+ actionAsText = "A_DC_TIMER_START";
+ break;
+ case A_INTEGRATE_TIMER_START:
+ actionAsText = "A_INTEGRATE_TIMER_START";
+ break;
+ case A_INTEGRATE_TIMER_STOP:
+ actionAsText = "A_INTEGRATE_TIMER_STOP";
+ break;
+ case A_FINALIZE_TIMER_START:
+ actionAsText = "A_FINALIZE_TIMER_START";
+ break;
+ case A_FINALIZE_TIMER_STOP:
+ actionAsText = "A_FINALIZE_TIMER_STOP";
+ break;
+ case A_ELECTION_COUNT:
+ actionAsText = "A_ELECTION_COUNT";
+ break;
+ case A_ELECTION_VOTE:
+ actionAsText = "A_ELECTION_VOTE";
+ break;
+ case A_ELECTION_CHECK:
+ actionAsText = "A_ELECTION_CHECK";
+ break;
+ case A_CL_JOIN_ANNOUNCE:
+ actionAsText = "A_CL_JOIN_ANNOUNCE";
+ break;
+ case A_CL_JOIN_REQUEST:
+ actionAsText = "A_CL_JOIN_REQUEST";
+ break;
+ case A_CL_JOIN_RESULT:
+ actionAsText = "A_CL_JOIN_RESULT";
+ break;
+ case A_DC_JOIN_OFFER_ALL:
+ actionAsText = "A_DC_JOIN_OFFER_ALL";
+ break;
+ case A_DC_JOIN_OFFER_ONE:
+ actionAsText = "A_DC_JOIN_OFFER_ONE";
+ break;
+ case A_DC_JOIN_PROCESS_REQ:
+ actionAsText = "A_DC_JOIN_PROCESS_REQ";
+ break;
+ case A_DC_JOIN_PROCESS_ACK:
+ actionAsText = "A_DC_JOIN_PROCESS_ACK";
+ break;
+ case A_DC_JOIN_FINALIZE:
+ actionAsText = "A_DC_JOIN_FINALIZE";
+ break;
+ case A_MSG_PROCESS:
+ actionAsText = "A_MSG_PROCESS";
+ break;
+ case A_MSG_ROUTE:
+ actionAsText = "A_MSG_ROUTE";
+ break;
+ case A_RECOVER:
+ actionAsText = "A_RECOVER";
+ break;
+ case A_DC_RELEASE:
+ actionAsText = "A_DC_RELEASE";
+ break;
+ case A_DC_RELEASED:
+ actionAsText = "A_DC_RELEASED";
+ break;
+ case A_DC_TAKEOVER:
+ actionAsText = "A_DC_TAKEOVER";
+ break;
+ case A_SHUTDOWN:
+ actionAsText = "A_SHUTDOWN";
+ break;
+ case A_SHUTDOWN_REQ:
+ actionAsText = "A_SHUTDOWN_REQ";
+ break;
+ case A_STOP:
+ actionAsText = "A_STOP ";
+ break;
+ case A_EXIT_0:
+ actionAsText = "A_EXIT_0";
+ break;
+ case A_EXIT_1:
+ actionAsText = "A_EXIT_1";
+ break;
+ case O_CIB_RESTART:
+ actionAsText = "O_CIB_RESTART";
+ break;
+ case A_CIB_START:
+ actionAsText = "A_CIB_START";
+ break;
+ case A_CIB_STOP:
+ actionAsText = "A_CIB_STOP";
+ break;
+ case A_TE_INVOKE:
+ actionAsText = "A_TE_INVOKE";
+ break;
+ case O_TE_RESTART:
+ actionAsText = "O_TE_RESTART";
+ break;
+ case A_TE_START:
+ actionAsText = "A_TE_START";
+ break;
+ case A_TE_STOP:
+ actionAsText = "A_TE_STOP";
+ break;
+ case A_TE_HALT:
+ actionAsText = "A_TE_HALT";
+ break;
+ case A_TE_CANCEL:
+ actionAsText = "A_TE_CANCEL";
+ break;
+ case A_PE_INVOKE:
+ actionAsText = "A_PE_INVOKE";
+ break;
+ case O_PE_RESTART:
+ actionAsText = "O_PE_RESTART";
+ break;
+ case A_PE_START:
+ actionAsText = "A_PE_START";
+ break;
+ case A_PE_STOP:
+ actionAsText = "A_PE_STOP";
+ break;
+ case A_NODE_BLOCK:
+ actionAsText = "A_NODE_BLOCK";
+ break;
+ case A_UPDATE_NODESTATUS:
+ actionAsText = "A_UPDATE_NODESTATUS";
+ break;
+ case A_LOG:
+ actionAsText = "A_LOG ";
+ break;
+ case A_ERROR:
+ actionAsText = "A_ERROR ";
+ break;
+ case A_WARN:
+ actionAsText = "A_WARN ";
+ break;
+ /* Composite actions */
+ case A_DC_TIMER_START | A_CL_JOIN_QUERY:
+ actionAsText = "A_DC_TIMER_START|A_CL_JOIN_QUERY";
+ break;
+ }
+
+ if (actionAsText == NULL) {
+ crm_err("Action %.16llx is unknown", action);
+ actionAsText = "<UNKNOWN_ACTION>";
+ }
+
+ return actionAsText;
+}
+
+void
+fsa_dump_inputs(int log_level, const char *text, long long input_register)
+{
+ if (input_register == A_NOTHING) {
+ return;
+ }
+ if (text == NULL) {
+ text = "Input register contents:";
+ }
+
+ if (pcmk_is_set(input_register, R_THE_DC)) {
+ crm_trace("%s %.16llx (R_THE_DC)", text, R_THE_DC);
+ }
+ if (pcmk_is_set(input_register, R_STARTING)) {
+ crm_trace("%s %.16llx (R_STARTING)", text, R_STARTING);
+ }
+ if (pcmk_is_set(input_register, R_SHUTDOWN)) {
+ crm_trace("%s %.16llx (R_SHUTDOWN)", text, R_SHUTDOWN);
+ }
+ if (pcmk_is_set(input_register, R_STAYDOWN)) {
+ crm_trace("%s %.16llx (R_STAYDOWN)", text, R_STAYDOWN);
+ }
+ if (pcmk_is_set(input_register, R_JOIN_OK)) {
+ crm_trace("%s %.16llx (R_JOIN_OK)", text, R_JOIN_OK);
+ }
+ if (pcmk_is_set(input_register, R_READ_CONFIG)) {
+ crm_trace("%s %.16llx (R_READ_CONFIG)", text, R_READ_CONFIG);
+ }
+ if (pcmk_is_set(input_register, R_INVOKE_PE)) {
+ crm_trace("%s %.16llx (R_INVOKE_PE)", text, R_INVOKE_PE);
+ }
+ if (pcmk_is_set(input_register, R_CIB_CONNECTED)) {
+ crm_trace("%s %.16llx (R_CIB_CONNECTED)", text, R_CIB_CONNECTED);
+ }
+ if (pcmk_is_set(input_register, R_PE_CONNECTED)) {
+ crm_trace("%s %.16llx (R_PE_CONNECTED)", text, R_PE_CONNECTED);
+ }
+ if (pcmk_is_set(input_register, R_TE_CONNECTED)) {
+ crm_trace("%s %.16llx (R_TE_CONNECTED)", text, R_TE_CONNECTED);
+ }
+ if (pcmk_is_set(input_register, R_LRM_CONNECTED)) {
+ crm_trace("%s %.16llx (R_LRM_CONNECTED)", text, R_LRM_CONNECTED);
+ }
+ if (pcmk_is_set(input_register, R_CIB_REQUIRED)) {
+ crm_trace("%s %.16llx (R_CIB_REQUIRED)", text, R_CIB_REQUIRED);
+ }
+ if (pcmk_is_set(input_register, R_PE_REQUIRED)) {
+ crm_trace("%s %.16llx (R_PE_REQUIRED)", text, R_PE_REQUIRED);
+ }
+ if (pcmk_is_set(input_register, R_TE_REQUIRED)) {
+ crm_trace("%s %.16llx (R_TE_REQUIRED)", text, R_TE_REQUIRED);
+ }
+ if (pcmk_is_set(input_register, R_REQ_PEND)) {
+ crm_trace("%s %.16llx (R_REQ_PEND)", text, R_REQ_PEND);
+ }
+ if (pcmk_is_set(input_register, R_PE_PEND)) {
+ crm_trace("%s %.16llx (R_PE_PEND)", text, R_PE_PEND);
+ }
+ if (pcmk_is_set(input_register, R_TE_PEND)) {
+ crm_trace("%s %.16llx (R_TE_PEND)", text, R_TE_PEND);
+ }
+ if (pcmk_is_set(input_register, R_RESP_PEND)) {
+ crm_trace("%s %.16llx (R_RESP_PEND)", text, R_RESP_PEND);
+ }
+ if (pcmk_is_set(input_register, R_CIB_DONE)) {
+ crm_trace("%s %.16llx (R_CIB_DONE)", text, R_CIB_DONE);
+ }
+ if (pcmk_is_set(input_register, R_HAVE_CIB)) {
+ crm_trace("%s %.16llx (R_HAVE_CIB)", text, R_HAVE_CIB);
+ }
+ if (pcmk_is_set(input_register, R_MEMBERSHIP)) {
+ crm_trace("%s %.16llx (R_MEMBERSHIP)", text, R_MEMBERSHIP);
+ }
+ if (pcmk_is_set(input_register, R_PEER_DATA)) {
+ crm_trace("%s %.16llx (R_PEER_DATA)", text, R_PEER_DATA);
+ }
+ if (pcmk_is_set(input_register, R_IN_RECOVERY)) {
+ crm_trace("%s %.16llx (R_IN_RECOVERY)", text, R_IN_RECOVERY);
+ }
+}
+
+void
+fsa_dump_actions(uint64_t action, const char *text)
+{
+ if (pcmk_is_set(action, A_READCONFIG)) {
+ crm_trace("Action %.16llx (A_READCONFIG) %s", A_READCONFIG, text);
+ }
+ if (pcmk_is_set(action, A_STARTUP)) {
+ crm_trace("Action %.16llx (A_STARTUP) %s", A_STARTUP, text);
+ }
+ if (pcmk_is_set(action, A_STARTED)) {
+ crm_trace("Action %.16llx (A_STARTED) %s", A_STARTED, text);
+ }
+ if (pcmk_is_set(action, A_HA_CONNECT)) {
+ crm_trace("Action %.16llx (A_CONNECT) %s", A_HA_CONNECT, text);
+ }
+ if (pcmk_is_set(action, A_HA_DISCONNECT)) {
+ crm_trace("Action %.16llx (A_DISCONNECT) %s", A_HA_DISCONNECT, text);
+ }
+ if (pcmk_is_set(action, A_LRM_CONNECT)) {
+ crm_trace("Action %.16llx (A_LRM_CONNECT) %s", A_LRM_CONNECT, text);
+ }
+ if (pcmk_is_set(action, A_LRM_EVENT)) {
+ crm_trace("Action %.16llx (A_LRM_EVENT) %s", A_LRM_EVENT, text);
+ }
+ if (pcmk_is_set(action, A_LRM_INVOKE)) {
+ crm_trace("Action %.16llx (A_LRM_INVOKE) %s", A_LRM_INVOKE, text);
+ }
+ if (pcmk_is_set(action, A_LRM_DISCONNECT)) {
+ crm_trace("Action %.16llx (A_LRM_DISCONNECT) %s", A_LRM_DISCONNECT, text);
+ }
+ if (pcmk_is_set(action, A_DC_TIMER_STOP)) {
+ crm_trace("Action %.16llx (A_DC_TIMER_STOP) %s", A_DC_TIMER_STOP, text);
+ }
+ if (pcmk_is_set(action, A_DC_TIMER_START)) {
+ crm_trace("Action %.16llx (A_DC_TIMER_START) %s", A_DC_TIMER_START, text);
+ }
+ if (pcmk_is_set(action, A_INTEGRATE_TIMER_START)) {
+ crm_trace("Action %.16llx (A_INTEGRATE_TIMER_START) %s", A_INTEGRATE_TIMER_START, text);
+ }
+ if (pcmk_is_set(action, A_INTEGRATE_TIMER_STOP)) {
+ crm_trace("Action %.16llx (A_INTEGRATE_TIMER_STOP) %s", A_INTEGRATE_TIMER_STOP, text);
+ }
+ if (pcmk_is_set(action, A_FINALIZE_TIMER_START)) {
+ crm_trace("Action %.16llx (A_FINALIZE_TIMER_START) %s", A_FINALIZE_TIMER_START, text);
+ }
+ if (pcmk_is_set(action, A_FINALIZE_TIMER_STOP)) {
+ crm_trace("Action %.16llx (A_FINALIZE_TIMER_STOP) %s", A_FINALIZE_TIMER_STOP, text);
+ }
+ if (pcmk_is_set(action, A_ELECTION_COUNT)) {
+ crm_trace("Action %.16llx (A_ELECTION_COUNT) %s", A_ELECTION_COUNT, text);
+ }
+ if (pcmk_is_set(action, A_ELECTION_VOTE)) {
+ crm_trace("Action %.16llx (A_ELECTION_VOTE) %s", A_ELECTION_VOTE, text);
+ }
+ if (pcmk_is_set(action, A_ELECTION_CHECK)) {
+ crm_trace("Action %.16llx (A_ELECTION_CHECK) %s", A_ELECTION_CHECK, text);
+ }
+ if (pcmk_is_set(action, A_CL_JOIN_ANNOUNCE)) {
+ crm_trace("Action %.16llx (A_CL_JOIN_ANNOUNCE) %s", A_CL_JOIN_ANNOUNCE, text);
+ }
+ if (pcmk_is_set(action, A_CL_JOIN_REQUEST)) {
+ crm_trace("Action %.16llx (A_CL_JOIN_REQUEST) %s", A_CL_JOIN_REQUEST, text);
+ }
+ if (pcmk_is_set(action, A_CL_JOIN_RESULT)) {
+ crm_trace("Action %.16llx (A_CL_JOIN_RESULT) %s", A_CL_JOIN_RESULT, text);
+ }
+ if (pcmk_is_set(action, A_DC_JOIN_OFFER_ALL)) {
+ crm_trace("Action %.16llx (A_DC_JOIN_OFFER_ALL) %s", A_DC_JOIN_OFFER_ALL, text);
+ }
+ if (pcmk_is_set(action, A_DC_JOIN_OFFER_ONE)) {
+ crm_trace("Action %.16llx (A_DC_JOIN_OFFER_ONE) %s", A_DC_JOIN_OFFER_ONE, text);
+ }
+ if (pcmk_is_set(action, A_DC_JOIN_PROCESS_REQ)) {
+ crm_trace("Action %.16llx (A_DC_JOIN_PROCESS_REQ) %s", A_DC_JOIN_PROCESS_REQ, text);
+ }
+ if (pcmk_is_set(action, A_DC_JOIN_PROCESS_ACK)) {
+ crm_trace("Action %.16llx (A_DC_JOIN_PROCESS_ACK) %s", A_DC_JOIN_PROCESS_ACK, text);
+ }
+ if (pcmk_is_set(action, A_DC_JOIN_FINALIZE)) {
+ crm_trace("Action %.16llx (A_DC_JOIN_FINALIZE) %s", A_DC_JOIN_FINALIZE, text);
+ }
+ if (pcmk_is_set(action, A_MSG_PROCESS)) {
+ crm_trace("Action %.16llx (A_MSG_PROCESS) %s", A_MSG_PROCESS, text);
+ }
+ if (pcmk_is_set(action, A_MSG_ROUTE)) {
+ crm_trace("Action %.16llx (A_MSG_ROUTE) %s", A_MSG_ROUTE, text);
+ }
+ if (pcmk_is_set(action, A_RECOVER)) {
+ crm_trace("Action %.16llx (A_RECOVER) %s", A_RECOVER, text);
+ }
+ if (pcmk_is_set(action, A_DC_RELEASE)) {
+ crm_trace("Action %.16llx (A_DC_RELEASE) %s", A_DC_RELEASE, text);
+ }
+ if (pcmk_is_set(action, A_DC_RELEASED)) {
+ crm_trace("Action %.16llx (A_DC_RELEASED) %s", A_DC_RELEASED, text);
+ }
+ if (pcmk_is_set(action, A_DC_TAKEOVER)) {
+ crm_trace("Action %.16llx (A_DC_TAKEOVER) %s", A_DC_TAKEOVER, text);
+ }
+ if (pcmk_is_set(action, A_SHUTDOWN)) {
+ crm_trace("Action %.16llx (A_SHUTDOWN) %s", A_SHUTDOWN, text);
+ }
+ if (pcmk_is_set(action, A_SHUTDOWN_REQ)) {
+ crm_trace("Action %.16llx (A_SHUTDOWN_REQ) %s", A_SHUTDOWN_REQ, text);
+ }
+ if (pcmk_is_set(action, A_STOP)) {
+ crm_trace("Action %.16llx (A_STOP ) %s", A_STOP, text);
+ }
+ if (pcmk_is_set(action, A_EXIT_0)) {
+ crm_trace("Action %.16llx (A_EXIT_0) %s", A_EXIT_0, text);
+ }
+ if (pcmk_is_set(action, A_EXIT_1)) {
+ crm_trace("Action %.16llx (A_EXIT_1) %s", A_EXIT_1, text);
+ }
+ if (pcmk_is_set(action, A_CIB_START)) {
+ crm_trace("Action %.16llx (A_CIB_START) %s", A_CIB_START, text);
+ }
+ if (pcmk_is_set(action, A_CIB_STOP)) {
+ crm_trace("Action %.16llx (A_CIB_STOP) %s", A_CIB_STOP, text);
+ }
+ if (pcmk_is_set(action, A_TE_INVOKE)) {
+ crm_trace("Action %.16llx (A_TE_INVOKE) %s", A_TE_INVOKE, text);
+ }
+ if (pcmk_is_set(action, A_TE_START)) {
+ crm_trace("Action %.16llx (A_TE_START) %s", A_TE_START, text);
+ }
+ if (pcmk_is_set(action, A_TE_STOP)) {
+ crm_trace("Action %.16llx (A_TE_STOP) %s", A_TE_STOP, text);
+ }
+ if (pcmk_is_set(action, A_TE_CANCEL)) {
+ crm_trace("Action %.16llx (A_TE_CANCEL) %s", A_TE_CANCEL, text);
+ }
+ if (pcmk_is_set(action, A_PE_INVOKE)) {
+ crm_trace("Action %.16llx (A_PE_INVOKE) %s", A_PE_INVOKE, text);
+ }
+ if (pcmk_is_set(action, A_PE_START)) {
+ crm_trace("Action %.16llx (A_PE_START) %s", A_PE_START, text);
+ }
+ if (pcmk_is_set(action, A_PE_STOP)) {
+ crm_trace("Action %.16llx (A_PE_STOP) %s", A_PE_STOP, text);
+ }
+ if (pcmk_is_set(action, A_NODE_BLOCK)) {
+ crm_trace("Action %.16llx (A_NODE_BLOCK) %s", A_NODE_BLOCK, text);
+ }
+ if (pcmk_is_set(action, A_UPDATE_NODESTATUS)) {
+ crm_trace("Action %.16llx (A_UPDATE_NODESTATUS) %s", A_UPDATE_NODESTATUS, text);
+ }
+ if (pcmk_is_set(action, A_LOG)) {
+ crm_trace("Action %.16llx (A_LOG ) %s", A_LOG, text);
+ }
+ if (pcmk_is_set(action, A_ERROR)) {
+ crm_trace("Action %.16llx (A_ERROR ) %s", A_ERROR, text);
+ }
+ if (pcmk_is_set(action, A_WARN)) {
+ crm_trace("Action %.16llx (A_WARN ) %s", A_WARN, text);
+ }
+}
+
+gboolean
+update_dc(xmlNode * msg)
+{
+ char *last_dc = controld_globals.dc_name;
+ const char *dc_version = NULL;
+ const char *welcome_from = NULL;
+
+ if (msg != NULL) {
+ gboolean invalid = FALSE;
+
+ dc_version = crm_element_value(msg, F_CRM_VERSION);
+ welcome_from = crm_element_value(msg, F_CRM_HOST_FROM);
+
+ CRM_CHECK(dc_version != NULL, return FALSE);
+ CRM_CHECK(welcome_from != NULL, return FALSE);
+
+ if (AM_I_DC
+ && !pcmk__str_eq(welcome_from, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ invalid = TRUE;
+
+ } else if ((controld_globals.dc_name != NULL)
+ && !pcmk__str_eq(welcome_from, controld_globals.dc_name,
+ pcmk__str_casei)) {
+ invalid = TRUE;
+ }
+
+ if (invalid) {
+ if (AM_I_DC) {
+ crm_err("Not updating DC to %s (%s): we are also a DC",
+ welcome_from, dc_version);
+ } else {
+ crm_warn("New DC %s is not %s",
+ welcome_from, controld_globals.dc_name);
+ }
+
+ controld_set_fsa_action_flags(A_CL_JOIN_QUERY | A_DC_TIMER_START);
+ controld_trigger_fsa();
+ return FALSE;
+ }
+ }
+
+ controld_globals.dc_name = NULL; // freed as last_dc
+ pcmk__str_update(&(controld_globals.dc_name), welcome_from);
+ pcmk__str_update(&(controld_globals.dc_version), dc_version);
+
+ if (pcmk__str_eq(controld_globals.dc_name, last_dc, pcmk__str_casei)) {
+ /* do nothing */
+
+ } else if (controld_globals.dc_name != NULL) {
+ crm_node_t *dc_node = crm_get_peer(0, controld_globals.dc_name);
+
+ crm_info("Set DC to %s (%s)",
+ controld_globals.dc_name,
+ pcmk__s(controld_globals.dc_version, "unknown version"));
+ pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_MEMBER);
+
+ } else if (last_dc != NULL) {
+ crm_info("Unset DC (was %s)", last_dc);
+ }
+
+ free(last_dc);
+ return TRUE;
+}
+
+void crmd_peer_down(crm_node_t *peer, bool full)
+{
+ if(full && peer->state == NULL) {
+ pcmk__update_peer_state(__func__, peer, CRM_NODE_LOST, 0);
+ crm_update_peer_proc(__func__, peer, crm_proc_none, NULL);
+ }
+ crm_update_peer_join(__func__, peer, crm_join_none);
+ pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN);
+}
+
+/*!
+ * \internal
+ * \brief Check feature set compatibility of DC and joining node
+ *
+ * Return true if a joining node's CRM feature set is compatible with the
+ * current DC's. The feature sets are compatible if they have the same major
+ * version number, and the DC's minor version number is the same or older than
+ * the joining node's. The minor-minor version is intended solely to allow
+ * resource agents to detect feature support, and so is ignored.
+ *
+ * \param[in] dc_version DC's feature set
+ * \param[in] join_version Joining node's version
+ */
+bool
+feature_set_compatible(const char *dc_version, const char *join_version)
+{
+ char *dc_minor = NULL;
+ char *join_minor = NULL;
+ long dc_v = 0;
+ long join_v = 0;
+
+ // Get DC's major version
+ errno = 0;
+ dc_v = strtol(dc_version, &dc_minor, 10);
+ if (errno) {
+ return FALSE;
+ }
+
+ // Get joining node's major version
+ errno = 0;
+ join_v = strtol(join_version, &join_minor, 10);
+ if (errno) {
+ return FALSE;
+ }
+
+ // Major version component must be identical
+ if (dc_v != join_v) {
+ return FALSE;
+ }
+
+ // Get DC's minor version
+ if (*dc_minor == '.') {
+ ++dc_minor;
+ }
+ errno = 0;
+ dc_v = strtol(dc_minor, NULL, 10);
+ if (errno) {
+ return FALSE;
+ }
+
+ // Get joining node's minor version
+ if (*join_minor == '.') {
+ ++join_minor;
+ }
+ errno = 0;
+ join_v = strtol(join_minor, NULL, 10);
+ if (errno) {
+ return FALSE;
+ }
+
+ // DC's minor version must be the same or older
+ return dc_v <= join_v;
+}
+
+const char *
+get_node_id(xmlNode *lrm_rsc_op)
+{
+ xmlNode *node = lrm_rsc_op;
+
+ while (node != NULL && !pcmk__str_eq(XML_CIB_TAG_STATE, TYPE(node), pcmk__str_casei)) {
+ node = node->parent;
+ }
+
+ CRM_CHECK(node != NULL, return NULL);
+ return ID(node);
+}
diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h
new file mode 100644
index 0000000..6ce413d
--- /dev/null
+++ b/daemons/controld/controld_utils.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef CRMD_UTILS__H
+# define CRMD_UTILS__H
+
+# include <crm/crm.h>
+# include <crm/common/xml.h>
+
+# define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
+
+enum node_update_flags {
+ node_update_none = 0x0000,
+ node_update_quick = 0x0001,
+ node_update_cluster = 0x0010,
+ node_update_peer = 0x0020,
+ node_update_join = 0x0040,
+ node_update_expected = 0x0100,
+ node_update_all = node_update_cluster|node_update_peer|node_update_join|node_update_expected,
+};
+
+crm_exit_t crmd_exit(crm_exit_t exit_code);
+_Noreturn void crmd_fast_exit(crm_exit_t exit_code);
+void controld_shutdown_schedulerd_ipc(void);
+void controld_stop_sched_timer(void);
+void controld_free_sched_timer(void);
+void controld_expect_sched_reply(char *ref);
+
+void fsa_dump_actions(uint64_t action, const char *text);
+void fsa_dump_inputs(int log_level, const char *text, long long input_register);
+
+gboolean update_dc(xmlNode * msg);
+void crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase);
+xmlNode *create_node_state_update(crm_node_t *node, int flags,
+ xmlNode *parent, const char *source);
+void populate_cib_nodes(enum node_update_flags flags, const char *source);
+void crm_update_quorum(gboolean quorum, gboolean force_update);
+void controld_close_attrd_ipc(void);
+void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node);
+void update_attrd_list(GList *attrs, uint32_t opts);
+void update_attrd_remote_node_removed(const char *host, const char *user_name);
+void update_attrd_clear_failures(const char *host, const char *rsc,
+ const char *op, const char *interval_spec,
+ gboolean is_remote_node);
+
+int crmd_join_phase_count(enum crm_join_phase phase);
+void crmd_join_phase_log(int level);
+
+void crmd_peer_down(crm_node_t *peer, bool full);
+
+bool feature_set_compatible(const char *dc_version, const char *join_version);
+
+const char *get_node_id(xmlNode *lrm_rsc_op);
+
+#endif
diff --git a/daemons/controld/pacemaker-controld.c b/daemons/controld/pacemaker-controld.c
new file mode 100644
index 0000000..5858898
--- /dev/null
+++ b/daemons/controld/pacemaker-controld.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <crm/crm.h>
+#include <crm/common/cmdline_internal.h>
+#include <crm/common/ipc.h>
+#include <crm/common/output_internal.h>
+#include <crm/common/xml.h>
+
+#include <pacemaker-controld.h>
+
+#define SUMMARY "daemon for coordinating a Pacemaker cluster's response " \
+ "to events"
+
+_Noreturn void crmd_init(void);
+extern void init_dotfile(void);
+
+controld_globals_t controld_globals = {
+ // Automatic initialization to 0, false, or NULL is fine for most members
+ .fsa_state = S_STARTING,
+ .fsa_actions = A_NOTHING,
+};
+
+static pcmk__supported_format_t formats[] = {
+ PCMK__SUPPORTED_FORMAT_NONE,
+ PCMK__SUPPORTED_FORMAT_TEXT,
+ PCMK__SUPPORTED_FORMAT_XML,
+ { NULL, NULL, NULL }
+};
+
+static GOptionContext *
+build_arg_context(pcmk__common_args_t *args, GOptionGroup **group)
+{
+ return pcmk__build_arg_context(args, "text (default), xml", group,
+ "[metadata]");
+}
+
+int
+main(int argc, char **argv)
+{
+ int rc = pcmk_rc_ok;
+ crm_exit_t exit_code = CRM_EX_OK;
+ bool initialize = true;
+
+ crm_ipc_t *old_instance = NULL;
+
+ pcmk__output_t *out = NULL;
+
+ GError *error = NULL;
+
+ GOptionGroup *output_group = NULL;
+ pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
+ gchar **processed_args = pcmk__cmdline_preproc(argv, NULL);
+ GOptionContext *context = build_arg_context(args, &output_group);
+
+ crm_log_preinit(NULL, argc, argv);
+
+ pcmk__register_formats(output_group, formats);
+ if (!g_option_context_parse_strv(context, &processed_args, &error)) {
+ exit_code = CRM_EX_USAGE;
+ goto done;
+ }
+
+ rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
+ if (rc != pcmk_rc_ok) {
+ exit_code = CRM_EX_ERROR;
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Error creating output format %s: %s",
+ args->output_ty, pcmk_rc_str(rc));
+ goto done;
+ }
+
+ if (args->version) {
+ out->version(out, false);
+ initialize = false;
+ goto done;
+ }
+
+ if ((g_strv_length(processed_args) >= 2)
+ && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) {
+ crmd_metadata();
+ initialize = false;
+ goto done;
+ }
+
+ pcmk__cli_init_logging("pacemaker-controld", args->verbosity);
+ crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
+ crm_notice("Starting Pacemaker controller");
+
+ old_instance = crm_ipc_new(CRM_SYSTEM_CRMD, 0);
+ if (old_instance == NULL) {
+ /* crm_ipc_new will have already printed an error message with crm_err. */
+ exit_code = CRM_EX_FATAL;
+ goto done;
+ }
+
+ if (crm_ipc_connect(old_instance)) {
+ /* IPC end-point already up */
+ crm_ipc_close(old_instance);
+ crm_ipc_destroy(old_instance);
+ crm_err("pacemaker-controld is already active, aborting startup");
+ initialize = false;
+ goto done;
+
+ } else {
+ /* not up or not authentic, we'll proceed either way */
+ crm_ipc_destroy(old_instance);
+ old_instance = NULL;
+ }
+
+ if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) {
+ exit_code = CRM_EX_FATAL;
+ crm_err("Terminating due to bad permissions on " PE_STATE_DIR);
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Bad permissions on " PE_STATE_DIR
+ " (see logs for details)");
+ goto done;
+
+ } else if (pcmk__daemon_can_write(CRM_CONFIG_DIR, NULL) == FALSE) {
+ exit_code = CRM_EX_FATAL;
+ crm_err("Terminating due to bad permissions on " CRM_CONFIG_DIR);
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Bad permissions on " CRM_CONFIG_DIR
+ " (see logs for details)");
+ goto done;
+ }
+
+ if (pcmk__log_output_new(&(controld_globals.logger_out)) != pcmk_rc_ok) {
+ exit_code = CRM_EX_FATAL;
+ goto done;
+ }
+
+ pcmk__output_set_log_level(controld_globals.logger_out, LOG_TRACE);
+
+done:
+ g_strfreev(processed_args);
+ pcmk__free_arg_context(context);
+
+ pcmk__output_and_clear_error(&error, out);
+
+ if (out != NULL) {
+ out->finish(out, exit_code, true, NULL);
+ pcmk__output_free(out);
+ }
+ pcmk__unregister_formats();
+
+ if ((exit_code == CRM_EX_OK) && initialize) {
+ // Does not return
+ crmd_init();
+ }
+ crm_exit(exit_code);
+}
+
+void
+crmd_init(void)
+{
+ crm_exit_t exit_code = CRM_EX_OK;
+ enum crmd_fsa_state state;
+
+ init_dotfile();
+ register_fsa_input(C_STARTUP, I_STARTUP, NULL);
+
+ crm_peer_init();
+ state = s_crmd_fsa(C_STARTUP);
+
+ if (state == S_PENDING || state == S_STARTING) {
+ /* Create the mainloop and run it... */
+ crm_trace("Starting %s's mainloop", crm_system_name);
+ controld_globals.mainloop = g_main_loop_new(NULL, FALSE);
+ g_main_loop_run(controld_globals.mainloop);
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) {
+ crm_info("Inhibiting automated respawn");
+ exit_code = CRM_EX_FATAL;
+ }
+
+ } else {
+ crm_err("Startup of %s failed. Current state: %s",
+ crm_system_name, fsa_state2string(state));
+ exit_code = CRM_EX_ERROR;
+ }
+
+ crm_info("%s[%lu] exiting with status %d (%s)",
+ crm_system_name, (unsigned long) getpid(), exit_code,
+ crm_exit_str(exit_code));
+
+ crmd_fast_exit(exit_code);
+}
diff --git a/daemons/controld/pacemaker-controld.h b/daemons/controld/pacemaker-controld.h
new file mode 100644
index 0000000..1484a00
--- /dev/null
+++ b/daemons/controld/pacemaker-controld.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef CRMD__H
+# define CRMD__H
+
+#include <controld_alerts.h>
+#include <controld_callbacks.h>
+#include <controld_cib.h>
+#include <controld_fencing.h>
+#include <controld_fsa.h>
+#include <controld_globals.h>
+#include <controld_timers.h>
+#include <controld_lrm.h>
+#include <controld_membership.h>
+#include <controld_messages.h>
+#include <controld_metadata.h>
+#include <controld_throttle.h>
+#include <controld_transition.h>
+#include <controld_utils.h>
+
+# define controld_trigger_config() \
+ controld_trigger_config_as(__func__, __LINE__)
+
+void crmd_metadata(void);
+void controld_trigger_config_as(const char *fn, int line);
+void controld_election_init(const char *uname);
+void controld_configure_election(GHashTable *options);
+void controld_remove_voter(const char *uname);
+void controld_election_fini(void);
+void controld_stop_current_election_timeout(void);
+
+#endif