diff options
Diffstat (limited to 'daemons/controld')
44 files changed, 21264 insertions, 0 deletions
diff --git a/daemons/controld/Makefile.am b/daemons/controld/Makefile.am new file mode 100644 index 0000000..08be1ff --- /dev/null +++ b/daemons/controld/Makefile.am @@ -0,0 +1,87 @@ +# +# Copyright 2018-2023 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. +# + +include $(top_srcdir)/mk/common.mk +include $(top_srcdir)/mk/man.mk + +halibdir = $(CRM_DAEMON_DIR) + +halib_PROGRAMS = pacemaker-controld + +noinst_HEADERS = controld_alerts.h \ + controld_callbacks.h \ + controld_cib.h \ + controld_fencing.h \ + controld_fsa.h \ + controld_globals.h \ + controld_lrm.h \ + controld_membership.h \ + controld_messages.h \ + controld_metadata.h \ + controld_throttle.h \ + controld_timers.h \ + controld_transition.h \ + controld_utils.h \ + pacemaker-controld.h + +pacemaker_controld_CFLAGS = $(CFLAGS_HARDENED_EXE) +pacemaker_controld_LDFLAGS = $(LDFLAGS_HARDENED_EXE) + +pacemaker_controld_LDADD = $(top_builddir)/lib/fencing/libstonithd.la \ + $(top_builddir)/lib/pacemaker/libpacemaker.la \ + $(top_builddir)/lib/pengine/libpe_rules.la \ + $(top_builddir)/lib/cib/libcib.la \ + $(top_builddir)/lib/cluster/libcrmcluster.la \ + $(top_builddir)/lib/common/libcrmcommon.la \ + $(top_builddir)/lib/services/libcrmservice.la \ + $(top_builddir)/lib/lrmd/liblrmd.la \ + $(CLUSTERLIBS) + +pacemaker_controld_SOURCES = pacemaker-controld.c \ + controld_alerts.c \ + controld_attrd.c \ + controld_callbacks.c \ + controld_cib.c \ + controld_control.c \ + controld_corosync.c \ + controld_election.c \ + controld_execd.c \ + controld_execd_state.c \ + controld_fencing.c \ + controld_fsa.c \ + controld_join_client.c \ + controld_join_dc.c \ + controld_matrix.c \ + controld_membership.c \ + controld_messages.c \ + controld_metadata.c \ + controld_remote_ra.c \ + controld_schedulerd.c \ + controld_te_actions.c \ + controld_te_callbacks.c \ + controld_te_events.c \ + controld_te_utils.c \ + controld_throttle.c \ + controld_timers.c \ + controld_transition.c \ + controld_utils.c + +if BUILD_XML_HELP +man7_MANS = pacemaker-controld.7 +endif + +CLEANFILES = $(man7_MANS) + +if BUILD_LEGACY_LINKS +install-exec-hook: + cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f crmd && $(LN_S) pacemaker-controld crmd + +uninstall-hook: + cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f crmd +endif diff --git a/daemons/controld/controld_alerts.c b/daemons/controld/controld_alerts.c new file mode 100644 index 0000000..27a5ce2 --- /dev/null +++ b/daemons/controld/controld_alerts.c @@ -0,0 +1,88 @@ +/* + * Copyright 2012-2021 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <glib.h> +#include <libxml/tree.h> + +#include <crm/fencing/internal.h> +#include <crm/lrmd.h> +#include <crm/lrmd_internal.h> +#include <crm/pengine/rules_internal.h> +#include <crm/pengine/status.h> +#include <crm/stonith-ng.h> + +#include <pacemaker-controld.h> + +static GList *crmd_alert_list = NULL; + +void +crmd_unpack_alerts(xmlNode *alerts) +{ + pe_free_alert_list(crmd_alert_list); + crmd_alert_list = pe_unpack_alerts(alerts); +} + +void +crmd_alert_node_event(crm_node_t *node) +{ + lrm_state_t *lrm_state; + + if (crmd_alert_list == NULL) { + return; + } + + lrm_state = lrm_state_find(controld_globals.our_nodename); + if (lrm_state == NULL) { + return; + } + + lrmd_send_node_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, + node->uname, node->id, node->state); +} + +void +crmd_alert_fencing_op(stonith_event_t * e) +{ + char *desc; + lrm_state_t *lrm_state; + + if (crmd_alert_list == NULL) { + return; + } + + lrm_state = lrm_state_find(controld_globals.our_nodename); + if (lrm_state == NULL) { + return; + } + + desc = stonith__event_description(e); + lrmd_send_fencing_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, + e->target, e->operation, desc, e->result); + free(desc); +} + +void +crmd_alert_resource_op(const char *node, lrmd_event_data_t * op) +{ + lrm_state_t *lrm_state; + + if (crmd_alert_list == NULL) { + return; + } + + lrm_state = lrm_state_find(controld_globals.our_nodename); + if (lrm_state == NULL) { + return; + } + + lrmd_send_resource_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, node, + op); +} diff --git a/daemons/controld/controld_alerts.h b/daemons/controld/controld_alerts.h new file mode 100644 index 0000000..ec5852a --- /dev/null +++ b/daemons/controld/controld_alerts.h @@ -0,0 +1,22 @@ +/* + * Copyright 2015-2021 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef CONTROLD_ALERTS__H +# define CONTROLD_ALERTS__H + +# include <crm/crm.h> +# include <crm/cluster.h> +# include <crm/stonith-ng.h> + +void crmd_unpack_alerts(xmlNode *alerts); +void crmd_alert_node_event(crm_node_t *node); +void crmd_alert_fencing_op(stonith_event_t *e); +void crmd_alert_resource_op(const char *node, lrmd_event_data_t *op); + +#endif diff --git a/daemons/controld/controld_attrd.c b/daemons/controld/controld_attrd.c new file mode 100644 index 0000000..923abb9 --- /dev/null +++ b/daemons/controld/controld_attrd.c @@ -0,0 +1,160 @@ +/* + * Copyright 2006-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <crm/crm.h> +#include <crm/common/attrd_internal.h> +#include <crm/common/ipc.h> +#include <crm/common/ipc_attrd_internal.h> +#include <crm/msg_xml.h> + +#include <pacemaker-controld.h> + +static pcmk_ipc_api_t *attrd_api = NULL; + +void +controld_close_attrd_ipc(void) +{ + if (attrd_api != NULL) { + crm_trace("Closing connection to pacemaker-attrd"); + pcmk_disconnect_ipc(attrd_api); + pcmk_free_ipc_api(attrd_api); + attrd_api = NULL; + } +} + +static inline const char * +node_type(bool is_remote) +{ + return is_remote? "Pacemaker Remote" : "cluster"; +} + +static inline const char * +when(void) +{ + return pcmk_is_set(controld_globals.fsa_input_register, + R_SHUTDOWN)? " at shutdown" : ""; +} + +static void +handle_attr_error(void) +{ + if (AM_I_DC) { + /* We are unable to provide accurate information to the + * scheduler, so allow another node to take over DC. + * @TODO Should we do this unconditionally on any failure? + */ + crmd_exit(CRM_EX_FATAL); + + } else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + // Fast-track shutdown since unable to request via attribute + register_fsa_input(C_FSA_INTERNAL, I_FAIL, NULL); + } +} + +void +update_attrd(const char *host, const char *name, const char *value, + const char *user_name, gboolean is_remote_node) +{ + int rc = pcmk_rc_ok; + + if (attrd_api == NULL) { + rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd); + } + if (rc == pcmk_rc_ok) { + uint32_t attrd_opts = pcmk__node_attr_value; + + if (is_remote_node) { + pcmk__set_node_attr_flags(attrd_opts, pcmk__node_attr_remote); + } + rc = pcmk__attrd_api_update(attrd_api, host, name, value, + NULL, NULL, user_name, attrd_opts); + } + if (rc != pcmk_rc_ok) { + do_crm_log(AM_I_DC? LOG_CRIT : LOG_ERR, + "Could not update attribute %s=%s for %s node %s%s: %s " + CRM_XS " rc=%d", name, value, node_type(is_remote_node), + host, when(), pcmk_rc_str(rc), rc); + handle_attr_error(); + } +} + +void +update_attrd_list(GList *attrs, uint32_t opts) +{ + int rc = pcmk_rc_ok; + + if (attrd_api == NULL) { + rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd); + } + if (rc == pcmk_rc_ok) { + rc = pcmk__attrd_api_update_list(attrd_api, attrs, NULL, NULL, NULL, + opts | pcmk__node_attr_value); + } + if (rc != pcmk_rc_ok) { + do_crm_log(AM_I_DC? LOG_CRIT : LOG_ERR, + "Could not update multiple node attributes: %s " + CRM_XS " rc=%d", pcmk_rc_str(rc), rc); + handle_attr_error(); + } +} + +void +update_attrd_remote_node_removed(const char *host, const char *user_name) +{ + int rc = pcmk_rc_ok; + + if (attrd_api == NULL) { + rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd); + } + if (rc == pcmk_rc_ok) { + crm_trace("Asking attribute manager to purge Pacemaker Remote node %s", + host); + rc = pcmk__attrd_api_purge(attrd_api, host); + } + if (rc != pcmk_rc_ok) { + crm_err("Could not purge Pacemaker Remote node %s " + "in attribute manager%s: %s " CRM_XS " rc=%d", + host, when(), pcmk_rc_str(rc), rc); + } +} + +void +update_attrd_clear_failures(const char *host, const char *rsc, const char *op, + const char *interval_spec, gboolean is_remote_node) +{ + int rc = pcmk_rc_ok; + + if (attrd_api == NULL) { + rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd); + } + if (rc == pcmk_rc_ok) { + const char *op_desc = pcmk__s(op, "operations"); + const char *interval_desc = "all"; + uint32_t attrd_opts = pcmk__node_attr_none; + + if (op != NULL) { + interval_desc = pcmk__s(interval_spec, "nonrecurring"); + } + if (is_remote_node) { + pcmk__set_node_attr_flags(attrd_opts, pcmk__node_attr_remote); + } + crm_info("Asking attribute manager to clear failure of %s %s for %s " + "on %s node %s", interval_desc, op_desc, rsc, + node_type(is_remote_node), host); + rc = pcmk__attrd_api_clear_failures(attrd_api, host, rsc, op, + interval_spec, NULL, attrd_opts); + } + if (rc != pcmk_rc_ok) { + crm_err("Could not clear failure attributes for %s on %s node %s%s: %s " + CRM_XS " rc=%d", pcmk__s(rsc, "all resources"), + node_type(is_remote_node), host, when(), pcmk_rc_str(rc), rc); + } +} diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c new file mode 100644 index 0000000..d578adc --- /dev/null +++ b/daemons/controld/controld_callbacks.c @@ -0,0 +1,367 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <sys/param.h> +#include <string.h> + +#include <crm/crm.h> +#include <crm/msg_xml.h> +#include <crm/common/xml.h> +#include <crm/cluster.h> +#include <crm/cib.h> + +#include <pacemaker-controld.h> + +/* From join_dc... */ +extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); + +void +crmd_ha_msg_filter(xmlNode * msg) +{ + if (AM_I_DC) { + const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); + + if (pcmk__str_eq(sys_from, CRM_SYSTEM_DC, pcmk__str_casei)) { + const char *from = crm_element_value(msg, F_ORIG); + + if (!pcmk__str_eq(from, controld_globals.our_nodename, + pcmk__str_casei)) { + int level = LOG_INFO; + const char *op = crm_element_value(msg, F_CRM_TASK); + + /* make sure the election happens NOW */ + if (controld_globals.fsa_state != S_ELECTION) { + ha_msg_input_t new_input; + + level = LOG_WARNING; + new_input.msg = msg; + register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input, + __func__); + } + + do_crm_log(level, "Another DC detected: %s (op=%s)", from, op); + goto done; + } + } + + } else { + const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); + + if (pcmk__str_eq(sys_to, CRM_SYSTEM_DC, pcmk__str_casei)) { + return; + } + } + + /* crm_log_xml_trace(msg, "HA[inbound]"); */ + route_message(C_HA_MESSAGE, msg); + + done: + controld_trigger_fsa(); +} + +/*! + * \internal + * \brief Check whether a node is online + * + * \param[in] node Node to check + * + * \retval -1 if completely dead + * \retval 0 if partially alive + * \retval 1 if completely alive + */ +static int +node_alive(const crm_node_t *node) +{ + if (pcmk_is_set(node->flags, crm_remote_node)) { + // Pacemaker Remote nodes can't be partially alive + return pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei) ? 1: -1; + + } else if (crm_is_peer_active(node)) { + // Completely up cluster node: both cluster member and peer + return 1; + + } else if (!pcmk_is_set(node->processes, crm_get_cluster_proc()) + && !pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)) { + // Completely down cluster node: neither cluster member nor peer + return -1; + } + + // Partially up cluster node: only cluster member or only peer + return 0; +} + +#define state_text(state) ((state)? (const char *)(state) : "in unknown state") + +void +peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) +{ + uint32_t old = 0; + bool appeared = FALSE; + bool is_remote = pcmk_is_set(node->flags, crm_remote_node); + + /* The controller waits to receive some information from the membership + * layer before declaring itself operational. If this is being called for a + * cluster node, indicate that we have it. + */ + if (!is_remote) { + controld_set_fsa_input_flags(R_PEER_DATA); + } + + if (type == crm_status_processes + && pcmk_is_set(node->processes, crm_get_cluster_proc()) + && !AM_I_DC + && !is_remote) { + /* + * This is a hack until we can send to a nodeid and/or we fix node name lookups + * These messages are ignored in crmd_ha_msg_filter() + */ + xmlNode *query = create_request(CRM_OP_HELLO, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); + + crm_debug("Sending hello to node %u so that it learns our node name", node->id); + send_cluster_message(node, crm_msg_crmd, query, FALSE); + + free_xml(query); + } + + if (node->uname == NULL) { + return; + } + + switch (type) { + case crm_status_uname: + /* If we've never seen the node, then it also won't be in the status section */ + crm_info("%s node %s is now %s", + (is_remote? "Remote" : "Cluster"), + node->uname, state_text(node->state)); + return; + + case crm_status_nstate: + /* This callback should not be called unless the state actually + * changed, but here's a failsafe just in case. + */ + CRM_CHECK(!pcmk__str_eq(data, node->state, pcmk__str_casei), + return); + + crm_info("%s node %s is now %s (was %s)", + (is_remote? "Remote" : "Cluster"), + node->uname, state_text(node->state), state_text(data)); + + if (pcmk__str_eq(CRM_NODE_MEMBER, node->state, pcmk__str_casei)) { + appeared = TRUE; + if (!is_remote) { + remove_stonith_cleanup(node->uname); + } + } else { + controld_remove_failed_sync_node(node->uname); + controld_remove_voter(node->uname); + } + + crmd_alert_node_event(node); + break; + + case crm_status_processes: + CRM_CHECK(data != NULL, return); + old = *(const uint32_t *)data; + appeared = pcmk_is_set(node->processes, crm_get_cluster_proc()); + + { + const char *dc_s = controld_globals.dc_name; + + if ((dc_s == NULL) && AM_I_DC) { + dc_s = "true"; + } + + crm_info("Node %s is %s a peer " CRM_XS + " DC=%s old=%#07x new=%#07x", + node->uname, (appeared? "now" : "no longer"), + pcmk__s(dc_s, "<none>"), old, node->processes); + } + + if (!pcmk_is_set((node->processes ^ old), crm_get_cluster_proc())) { + /* Peer status did not change. This should not be possible, + * since we don't track process flags other than peer status. + */ + crm_trace("Process flag %#7x did not change from %#7x to %#7x", + crm_get_cluster_proc(), old, node->processes); + return; + + } + + if (!appeared) { + node->peer_lost = time(NULL); + controld_remove_failed_sync_node(node->uname); + controld_remove_voter(node->uname); + } + + if (!pcmk_is_set(controld_globals.fsa_input_register, + R_CIB_CONNECTED)) { + crm_trace("Ignoring peer status change because not connected to CIB"); + return; + + } else if (controld_globals.fsa_state == S_STOPPING) { + crm_trace("Ignoring peer status change because stopping"); + return; + } + + if (!appeared + && pcmk__str_eq(node->uname, controld_globals.our_nodename, + pcmk__str_casei)) { + /* Did we get evicted? */ + crm_notice("Our peer connection failed"); + register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL); + + } else if (pcmk__str_eq(node->uname, controld_globals.dc_name, + pcmk__str_casei) + && !crm_is_peer_active(node)) { + /* Did the DC leave us? */ + crm_notice("Our peer on the DC (%s) is dead", + controld_globals.dc_name); + register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL); + + /* @COMPAT DC < 1.1.13: If a DC shuts down normally, we don't + * want to fence it. Newer DCs will send their shutdown request + * to all peers, who will update the DC's expected state to + * down, thus avoiding fencing. We can safely erase the DC's + * transient attributes when it leaves in that case. However, + * the only way to avoid fencing older DCs is to leave the + * transient attributes intact until it rejoins. + */ + if (compare_version(controld_globals.dc_version, "3.0.9") > 0) { + controld_delete_node_state(node->uname, + controld_section_attrs, + cib_scope_local); + } + + } else if (AM_I_DC + || pcmk_is_set(controld_globals.flags, controld_dc_left) + || (controld_globals.dc_name == NULL)) { + /* This only needs to be done once, so normally the DC should do + * it. However if there is no DC, every node must do it, since + * there is no other way to ensure some one node does it. + */ + if (appeared) { + te_trigger_stonith_history_sync(FALSE); + } else { + controld_delete_node_state(node->uname, + controld_section_attrs, + cib_scope_local); + } + } + break; + } + + if (AM_I_DC) { + xmlNode *update = NULL; + int flags = node_update_peer; + int alive = node_alive(node); + pcmk__graph_action_t *down = match_down_event(node->uuid); + + crm_trace("Alive=%d, appeared=%d, down=%d", + alive, appeared, (down? down->id : -1)); + + if (appeared && (alive > 0) && !is_remote) { + register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); + } + + if (down) { + const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK); + + if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) { + + /* tengine_stonith_callback() confirms fence actions */ + crm_trace("Updating CIB %s fencer reported fencing of %s complete", + (pcmk_is_set(down->flags, pcmk__graph_action_confirmed)? "after" : "before"), node->uname); + + } else if (!appeared && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { + + // Shutdown actions are immediately confirmed (i.e. no_wait) + if (!is_remote) { + flags |= node_update_join | node_update_expected; + crmd_peer_down(node, FALSE); + check_join_state(controld_globals.fsa_state, __func__); + } + if (alive >= 0) { + crm_info("%s of peer %s is in progress " CRM_XS " action=%d", + task, node->uname, down->id); + } else { + crm_notice("%s of peer %s is complete " CRM_XS " action=%d", + task, node->uname, down->id); + pcmk__update_graph(controld_globals.transition_graph, down); + trigger_graph(); + } + + } else { + crm_trace("Node %s is %s, was expected to %s (op %d)", + node->uname, + ((alive > 0)? "alive" : + ((alive < 0)? "dead" : "partially alive")), + task, down->id); + } + + } else if (appeared == FALSE) { + if ((controld_globals.transition_graph == NULL) + || (controld_globals.transition_graph->id == -1)) { + crm_info("Stonith/shutdown of node %s is unknown to the " + "current DC", node->uname); + } else { + crm_warn("Stonith/shutdown of node %s was not expected", + node->uname); + } + if (!is_remote) { + crm_update_peer_join(__func__, node, crm_join_none); + check_join_state(controld_globals.fsa_state, __func__); + } + abort_transition(INFINITY, pcmk__graph_restart, "Node failure", + NULL); + fail_incompletable_actions(controld_globals.transition_graph, + node->uuid); + + } else { + crm_trace("Node %s came up, was not expected to be down", + node->uname); + } + + if (is_remote) { + /* A pacemaker_remote node won't have its cluster status updated + * in the CIB by membership-layer callbacks, so do it here. + */ + flags |= node_update_cluster; + + /* Trigger resource placement on newly integrated nodes */ + if (appeared) { + abort_transition(INFINITY, pcmk__graph_restart, + "Pacemaker Remote node integrated", NULL); + } + } + + /* Update the CIB node state */ + update = create_node_state_update(node, flags, NULL, __func__); + if (update == NULL) { + crm_debug("Node state update not yet possible for %s", node->uname); + } else { + fsa_cib_anon_update(XML_CIB_TAG_STATUS, update); + } + free_xml(update); + } + + controld_trigger_fsa(); +} + +gboolean +crm_fsa_trigger(gpointer user_data) +{ + crm_trace("Invoked (queue len: %d)", + g_list_length(controld_globals.fsa_message_queue)); + s_crmd_fsa(C_FSA_INTERNAL); + crm_trace("Exited (queue len: %d)", + g_list_length(controld_globals.fsa_message_queue)); + return TRUE; +} diff --git a/daemons/controld/controld_callbacks.h b/daemons/controld/controld_callbacks.h new file mode 100644 index 0000000..a69d515 --- /dev/null +++ b/daemons/controld/controld_callbacks.h @@ -0,0 +1,21 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef CONTROLD_CALLBACKS__H +# define CONTROLD_CALLBACKS__H + +#include <crm/cluster.h> + +extern void crmd_ha_msg_filter(xmlNode * msg); + +extern gboolean crm_fsa_trigger(gpointer user_data); + +extern void peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data); + +#endif diff --git a/daemons/controld/controld_cib.c b/daemons/controld/controld_cib.c new file mode 100644 index 0000000..94b99dd --- /dev/null +++ b/daemons/controld/controld_cib.c @@ -0,0 +1,1138 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <unistd.h> /* sleep */ + +#include <crm/common/alerts_internal.h> +#include <crm/common/xml.h> +#include <crm/crm.h> +#include <crm/msg_xml.h> +#include <crm/lrmd_internal.h> + +#include <pacemaker-controld.h> + +// Call ID of the most recent in-progress CIB resource update (or 0 if none) +static int pending_rsc_update = 0; + +// Call IDs of requested CIB replacements that won't trigger a new election +// (used as a set of gint values) +static GHashTable *cib_replacements = NULL; + +/*! + * \internal + * \brief Store the call ID of a CIB replacement that the controller requested + * + * The \p do_cib_replaced() callback function will avoid triggering a new + * election when we're notified of one of these expected replacements. + * + * \param[in] call_id CIB call ID (or 0 for a synchronous call) + * + * \note This function should be called after making any asynchronous CIB + * request (or before making any synchronous CIB request) that may replace + * part of the nodes or status section. This may include CIB sync calls. + */ +void +controld_record_cib_replace_call(int call_id) +{ + CRM_CHECK(call_id >= 0, return); + + if (cib_replacements == NULL) { + cib_replacements = g_hash_table_new(NULL, NULL); + } + + /* If the call ID is already present in the table, then it's old. We may not + * be removing them properly, and we could improperly ignore replacement + * notifications if cib_t:call_id wraps around. + */ + CRM_LOG_ASSERT(g_hash_table_add(cib_replacements, + GINT_TO_POINTER((gint) call_id))); +} + +/*! + * \internal + * \brief Remove the call ID of a CIB replacement from the replacements table + * + * \param[in] call_id CIB call ID (or 0 for a synchronous call) + * + * \return \p true if \p call_id was found in the table, or \p false otherwise + * + * \note CIB notifications run before CIB callbacks. If this function is called + * from within a callback, \p do_cib_replaced() will have removed + * \p call_id from the table first if relevant changes triggered a + * notification. + */ +bool +controld_forget_cib_replace_call(int call_id) +{ + CRM_CHECK(call_id >= 0, return false); + + if (cib_replacements == NULL) { + return false; + } + return g_hash_table_remove(cib_replacements, + GINT_TO_POINTER((gint) call_id)); +} + +/*! + * \internal + * \brief Empty the hash table containing call IDs of CIB replacement requests + */ +void +controld_forget_all_cib_replace_calls(void) +{ + if (cib_replacements != NULL) { + g_hash_table_remove_all(cib_replacements); + } +} + +/*! + * \internal + * \brief Free the hash table containing call IDs of CIB replacement requests + */ +void +controld_destroy_cib_replacements_table(void) +{ + if (cib_replacements != NULL) { + g_hash_table_destroy(cib_replacements); + cib_replacements = NULL; + } +} + +/*! + * \internal + * \brief Respond to a dropped CIB connection + * + * \param[in] user_data CIB connection that dropped + */ +static void +handle_cib_disconnect(gpointer user_data) +{ + CRM_LOG_ASSERT(user_data == controld_globals.cib_conn); + + controld_trigger_fsa(); + controld_globals.cib_conn->state = cib_disconnected; + + if (pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { + // @TODO This should trigger a reconnect, not a shutdown + crm_crit("Lost connection to the CIB manager, shutting down"); + register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); + controld_clear_fsa_input_flags(R_CIB_CONNECTED); + + } else { // Expected + crm_info("Connection to the CIB manager terminated"); + } +} + +static void +do_cib_updated(const char *event, xmlNode * msg) +{ + if (pcmk__alert_in_patchset(msg, TRUE)) { + controld_trigger_config(); + } +} + +static void +do_cib_replaced(const char *event, xmlNode * msg) +{ + int call_id = 0; + const char *client_id = crm_element_value(msg, F_CIB_CLIENTID); + uint32_t change_section = cib_change_section_nodes + |cib_change_section_status; + long long value = 0; + + crm_debug("Updating the CIB after a replace: DC=%s", pcmk__btoa(AM_I_DC)); + if (!AM_I_DC) { + return; + } + + if ((crm_element_value_int(msg, F_CIB_CALLID, &call_id) == 0) + && pcmk__str_eq(client_id, controld_globals.cib_client_id, + pcmk__str_none) + && controld_forget_cib_replace_call(call_id)) { + // We requested this replace op. No need to restart the join. + return; + } + + if ((crm_element_value_ll(msg, F_CIB_CHANGE_SECTION, &value) < 0) + || (value < 0) || (value > UINT32_MAX)) { + + crm_trace("Couldn't parse '%s' from message", F_CIB_CHANGE_SECTION); + } else { + change_section = (uint32_t) value; + } + + if (pcmk_any_flags_set(change_section, cib_change_section_nodes + |cib_change_section_status)) { + + /* start the join process again so we get everyone's LRM status */ + populate_cib_nodes(node_update_quick|node_update_all, __func__); + + register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); + } +} + +void +controld_disconnect_cib_manager(void) +{ + cib_t *cib_conn = controld_globals.cib_conn; + + CRM_ASSERT(cib_conn != NULL); + + crm_info("Disconnecting from the CIB manager"); + + controld_clear_fsa_input_flags(R_CIB_CONNECTED); + + cib_conn->cmds->del_notify_callback(cib_conn, T_CIB_REPLACE_NOTIFY, + do_cib_replaced); + cib_conn->cmds->del_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY, + do_cib_updated); + cib_free_callbacks(cib_conn); + + if (cib_conn->state != cib_disconnected) { + cib_conn->cmds->set_secondary(cib_conn, + cib_scope_local|cib_discard_reply); + cib_conn->cmds->signoff(cib_conn); + } + + crm_notice("Disconnected from the CIB manager"); +} + +/* A_CIB_STOP, A_CIB_START, O_CIB_RESTART */ +void +do_cib_control(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + static int cib_retries = 0; + + cib_t *cib_conn = controld_globals.cib_conn; + + void (*dnotify_fn) (gpointer user_data) = handle_cib_disconnect; + void (*replace_cb) (const char *event, xmlNodePtr msg) = do_cib_replaced; + void (*update_cb) (const char *event, xmlNodePtr msg) = do_cib_updated; + + int rc = pcmk_ok; + + CRM_ASSERT(cib_conn != NULL); + + if (pcmk_is_set(action, A_CIB_STOP)) { + if ((cib_conn->state != cib_disconnected) + && (pending_rsc_update != 0)) { + + crm_info("Waiting for resource update %d to complete", + pending_rsc_update); + crmd_fsa_stall(FALSE); + return; + } + controld_disconnect_cib_manager(); + } + + if (!pcmk_is_set(action, A_CIB_START)) { + return; + } + + if (cur_state == S_STOPPING) { + crm_err("Ignoring request to connect to the CIB manager after " + "shutdown"); + return; + } + + rc = cib_conn->cmds->signon(cib_conn, CRM_SYSTEM_CRMD, + cib_command_nonblocking); + + if (rc != pcmk_ok) { + // A short wait that usually avoids stalling the FSA + sleep(1); + rc = cib_conn->cmds->signon(cib_conn, CRM_SYSTEM_CRMD, + cib_command_nonblocking); + } + + if (rc != pcmk_ok) { + crm_info("Could not connect to the CIB manager: %s", pcmk_strerror(rc)); + + } else if (cib_conn->cmds->set_connection_dnotify(cib_conn, + dnotify_fn) != pcmk_ok) { + crm_err("Could not set dnotify callback"); + + } else if (cib_conn->cmds->add_notify_callback(cib_conn, + T_CIB_REPLACE_NOTIFY, + replace_cb) != pcmk_ok) { + crm_err("Could not set CIB notification callback (replace)"); + + } else if (cib_conn->cmds->add_notify_callback(cib_conn, + T_CIB_DIFF_NOTIFY, + update_cb) != pcmk_ok) { + crm_err("Could not set CIB notification callback (update)"); + + } else { + controld_set_fsa_input_flags(R_CIB_CONNECTED); + cib_retries = 0; + cib_conn->cmds->client_id(cib_conn, &controld_globals.cib_client_id, + NULL); + } + + if (!pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { + cib_retries++; + + if (cib_retries < 30) { + crm_warn("Couldn't complete CIB registration %d times... " + "pause and retry", cib_retries); + controld_start_wait_timer(); + crmd_fsa_stall(FALSE); + + } else { + crm_err("Could not complete CIB registration %d times... " + "hard error", cib_retries); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } + } +} + +#define MIN_CIB_OP_TIMEOUT (30) + +/*! + * \internal + * \brief Get the timeout (in seconds) that should be used with CIB operations + * + * \return The maximum of 30 seconds, the value of the PCMK_cib_timeout + * environment variable, or 10 seconds times one more than the number of + * nodes in the cluster. + */ +unsigned int +cib_op_timeout(void) +{ + static int env_timeout = -1; + unsigned int calculated_timeout = 0; + + if (env_timeout == -1) { + const char *env = getenv("PCMK_cib_timeout"); + + pcmk__scan_min_int(env, &env_timeout, MIN_CIB_OP_TIMEOUT); + crm_trace("Minimum CIB op timeout: %ds (environment: %s)", + env_timeout, (env? env : "none")); + } + + calculated_timeout = 1 + crm_active_peers(); + if (crm_remote_peer_cache) { + calculated_timeout += g_hash_table_size(crm_remote_peer_cache); + } + calculated_timeout *= 10; + + calculated_timeout = QB_MAX(calculated_timeout, env_timeout); + crm_trace("Calculated timeout: %us", calculated_timeout); + + if (controld_globals.cib_conn) { + controld_globals.cib_conn->call_timeout = calculated_timeout; + } + return calculated_timeout; +} + +/*! + * \internal + * \brief Get CIB call options to use local scope if primary is unavailable + * + * \return CIB call options + */ +int +crmd_cib_smart_opt(void) +{ + int call_opt = cib_none; + + if ((controld_globals.fsa_state == S_ELECTION) + || (controld_globals.fsa_state == S_PENDING)) { + crm_info("Sending update to local CIB in state: %s", + fsa_state2string(controld_globals.fsa_state)); + cib__set_call_options(call_opt, "update", cib_scope_local); + } + return call_opt; +} + +static void +cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, + void *user_data) +{ + char *desc = user_data; + + if (rc == 0) { + crm_debug("Deletion of %s (via CIB call %d) succeeded", desc, call_id); + } else { + crm_warn("Deletion of %s (via CIB call %d) failed: %s " CRM_XS " rc=%d", + desc, call_id, pcmk_strerror(rc), rc); + } +} + +// Searches for various portions of node_state to delete + +// Match a particular node's node_state (takes node name 1x) +#define XPATH_NODE_STATE "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" + +// Node's lrm section (name 1x) +#define XPATH_NODE_LRM XPATH_NODE_STATE "/" XML_CIB_TAG_LRM + +/* Node's lrm_rsc_op entries and lrm_resource entries without unexpired lock + * (name 2x, (seconds_since_epoch - XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT) 1x) + */ +#define XPATH_NODE_LRM_UNLOCKED XPATH_NODE_STATE "//" XML_LRM_TAG_RSC_OP \ + "|" XPATH_NODE_STATE \ + "//" XML_LRM_TAG_RESOURCE \ + "[not(@" XML_CONFIG_ATTR_SHUTDOWN_LOCK ") " \ + "or " XML_CONFIG_ATTR_SHUTDOWN_LOCK "<%lld]" + +// Node's transient_attributes section (name 1x) +#define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" XML_TAG_TRANSIENT_NODEATTRS + +// Everything under node_state (name 1x) +#define XPATH_NODE_ALL XPATH_NODE_STATE "/*" + +/* Unlocked history + transient attributes + * (name 2x, (seconds_since_epoch - XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT) 1x, + * name 1x) + */ +#define XPATH_NODE_ALL_UNLOCKED XPATH_NODE_LRM_UNLOCKED "|" XPATH_NODE_ATTRS + +/*! + * \internal + * \brief Delete subsection of a node's CIB node_state + * + * \param[in] uname Desired node + * \param[in] section Subsection of node_state to delete + * \param[in] options CIB call options to use + */ +void +controld_delete_node_state(const char *uname, enum controld_section_e section, + int options) +{ + cib_t *cib_conn = controld_globals.cib_conn; + + char *xpath = NULL; + char *desc = NULL; + + // Shutdown locks that started before this time are expired + long long expire = (long long) time(NULL) + - controld_globals.shutdown_lock_limit; + + CRM_CHECK(uname != NULL, return); + switch (section) { + case controld_section_lrm: + xpath = crm_strdup_printf(XPATH_NODE_LRM, uname); + desc = crm_strdup_printf("resource history for node %s", uname); + break; + case controld_section_lrm_unlocked: + xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED, + uname, uname, expire); + desc = crm_strdup_printf("resource history (other than shutdown " + "locks) for node %s", uname); + break; + case controld_section_attrs: + xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname); + desc = crm_strdup_printf("transient attributes for node %s", uname); + break; + case controld_section_all: + xpath = crm_strdup_printf(XPATH_NODE_ALL, uname); + desc = crm_strdup_printf("all state for node %s", uname); + break; + case controld_section_all_unlocked: + xpath = crm_strdup_printf(XPATH_NODE_ALL_UNLOCKED, + uname, uname, expire, uname); + desc = crm_strdup_printf("all state (other than shutdown locks) " + "for node %s", uname); + break; + } + + if (cib_conn == NULL) { + crm_warn("Unable to delete %s: no CIB connection", desc); + free(desc); + } else { + int call_id; + + cib__set_call_options(options, "node state deletion", + cib_xpath|cib_multiple); + call_id = cib_conn->cmds->remove(cib_conn, xpath, NULL, options); + crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s", + desc, call_id, xpath); + fsa_register_cib_callback(call_id, desc, cib_delete_callback); + // CIB library handles freeing desc + } + free(xpath); +} + +// Takes node name and resource ID +#define XPATH_RESOURCE_HISTORY "//" XML_CIB_TAG_STATE \ + "[@" XML_ATTR_UNAME "='%s']/" \ + XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ + "/" XML_LRM_TAG_RESOURCE \ + "[@" XML_ATTR_ID "='%s']" +// @TODO could add "and @XML_CONFIG_ATTR_SHUTDOWN_LOCK" to limit to locks + +/*! + * \internal + * \brief Clear resource history from CIB for a given resource and node + * + * \param[in] rsc_id ID of resource to be cleared + * \param[in] node Node whose resource history should be cleared + * \param[in] user_name ACL user name to use + * \param[in] call_options CIB call options + * + * \return Standard Pacemaker return code + */ +int +controld_delete_resource_history(const char *rsc_id, const char *node, + const char *user_name, int call_options) +{ + char *desc = NULL; + char *xpath = NULL; + int rc = pcmk_rc_ok; + + CRM_CHECK((rsc_id != NULL) && (node != NULL), return EINVAL); + + desc = crm_strdup_printf("resource history for %s on %s", rsc_id, node); + if (controld_globals.cib_conn == NULL) { + crm_err("Unable to clear %s: no CIB connection", desc); + free(desc); + return ENOTCONN; + } + + // Ask CIB to delete the entry + xpath = crm_strdup_printf(XPATH_RESOURCE_HISTORY, node, rsc_id); + rc = cib_internal_op(controld_globals.cib_conn, PCMK__CIB_REQUEST_DELETE, + NULL, xpath, NULL, NULL, call_options|cib_xpath, + user_name); + + if (rc < 0) { + rc = pcmk_legacy2rc(rc); + crm_err("Could not delete resource status of %s on %s%s%s: %s " + CRM_XS " rc=%d", rsc_id, node, + (user_name? " for user " : ""), (user_name? user_name : ""), + pcmk_rc_str(rc), rc); + free(desc); + free(xpath); + return rc; + } + + if (pcmk_is_set(call_options, cib_sync_call)) { + if (pcmk_is_set(call_options, cib_dryrun)) { + crm_debug("Deletion of %s would succeed", desc); + } else { + crm_debug("Deletion of %s succeeded", desc); + } + free(desc); + + } else { + crm_info("Clearing %s (via CIB call %d) " CRM_XS " xpath=%s", + desc, rc, xpath); + fsa_register_cib_callback(rc, desc, cib_delete_callback); + // CIB library handles freeing desc + } + + free(xpath); + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Build XML and string of parameters meeting some criteria, for digest + * + * \param[in] op Executor event with parameter table to use + * \param[in] metadata Parsed meta-data for executed resource agent + * \param[in] param_type Flag used for selection criteria + * \param[out] result Will be set to newly created XML with selected + * parameters as attributes + * + * \return Newly allocated space-separated string of parameter names + * \note Selection criteria varies by param_type: for the restart digest, we + * want parameters that are *not* marked reloadable (OCF 1.1) or that + * *are* marked unique (pre-1.1), for both string and XML results; for the + * secure digest, we want parameters that *are* marked private for the + * string, but parameters that are *not* marked private for the XML. + * \note It is the caller's responsibility to free the string return value with + * \p g_string_free() and the XML result with \p free_xml(). + */ +static GString * +build_parameter_list(const lrmd_event_data_t *op, + const struct ra_metadata_s *metadata, + enum ra_param_flags_e param_type, xmlNode **result) +{ + GString *list = NULL; + + *result = create_xml_node(NULL, XML_TAG_PARAMS); + + /* Consider all parameters only except private ones to be consistent with + * what scheduler does with calculate_secure_digest(). + */ + if (param_type == ra_param_private + && compare_version(controld_globals.dc_version, "3.16.0") >= 0) { + g_hash_table_foreach(op->params, hash2field, *result); + pcmk__filter_op_for_digest(*result); + } + + for (GList *iter = metadata->ra_params; iter != NULL; iter = iter->next) { + struct ra_param_s *param = (struct ra_param_s *) iter->data; + + bool accept_for_list = false; + bool accept_for_xml = false; + + switch (param_type) { + case ra_param_reloadable: + accept_for_list = !pcmk_is_set(param->rap_flags, param_type); + accept_for_xml = accept_for_list; + break; + + case ra_param_unique: + accept_for_list = pcmk_is_set(param->rap_flags, param_type); + accept_for_xml = accept_for_list; + break; + + case ra_param_private: + accept_for_list = pcmk_is_set(param->rap_flags, param_type); + accept_for_xml = !accept_for_list; + break; + } + + if (accept_for_list) { + crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type)); + + if (list == NULL) { + // We will later search for " WORD ", so start list with a space + pcmk__add_word(&list, 256, " "); + } + pcmk__add_word(&list, 0, param->rap_name); + + } else { + crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type)); + } + + if (accept_for_xml) { + const char *v = g_hash_table_lookup(op->params, param->rap_name); + + if (v != NULL) { + crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v); + crm_xml_add(*result, param->rap_name, v); + } + + } else { + crm_trace("Removing attr %s from the xml result", param->rap_name); + xml_remove_prop(*result, param->rap_name); + } + } + + if (list != NULL) { + // We will later search for " WORD ", so end list with a space + pcmk__add_word(&list, 0, " "); + } + return list; +} + +static void +append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, + xmlNode *update, const char *version) +{ + GString *list = NULL; + char *digest = NULL; + xmlNode *restart = NULL; + + CRM_LOG_ASSERT(op->params != NULL); + + if (op->interval_ms > 0) { + /* monitors are not reloadable */ + return; + } + + if (pcmk_is_set(metadata->ra_flags, ra_supports_reload_agent)) { + // Add parameters not marked reloadable to the "op-force-restart" list + list = build_parameter_list(op, metadata, ra_param_reloadable, + &restart); + + } else if (pcmk_is_set(metadata->ra_flags, ra_supports_legacy_reload)) { + /* @COMPAT pre-OCF-1.1 resource agents + * + * Before OCF 1.1, Pacemaker abused "unique=0" to indicate + * reloadability. Add any parameters with unique="1" to the + * "op-force-restart" list. + */ + list = build_parameter_list(op, metadata, ra_param_unique, &restart); + + } else { + // Resource does not support agent reloads + return; + } + + digest = calculate_operation_digest(restart, version); + /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload, + * no matter if it actually supports any parameters with unique="1"). */ + crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, + (list == NULL)? "" : (const char *) list->str); + crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); + + if ((list != NULL) && (list->len > 0)) { + crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str); + } else { + crm_trace("%s: %s", op->rsc_id, digest); + } + + if (list != NULL) { + g_string_free(list, TRUE); + } + free_xml(restart); + free(digest); +} + +static void +append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, + xmlNode *update, const char *version) +{ + GString *list = NULL; + char *digest = NULL; + xmlNode *secure = NULL; + + CRM_LOG_ASSERT(op->params != NULL); + + /* + * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the + * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on + * the insecure ones + */ + list = build_parameter_list(op, metadata, ra_param_private, &secure); + + if (list != NULL) { + digest = calculate_operation_digest(secure, version); + crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, (const char *) list->str); + crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest); + + crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str); + g_string_free(list, TRUE); + } else { + crm_trace("%s: no secure parameters", op->rsc_id); + } + + free_xml(secure); + free(digest); +} + +/*! + * \internal + * \brief Create XML for a resource history entry + * + * \param[in] func Function name of caller + * \param[in,out] parent XML to add entry to + * \param[in] rsc Affected resource + * \param[in,out] op Action to add an entry for (or NULL to do nothing) + * \param[in] node_name Node where action occurred + */ +void +controld_add_resource_history_xml_as(const char *func, xmlNode *parent, + const lrmd_rsc_info_t *rsc, + lrmd_event_data_t *op, + const char *node_name) +{ + int target_rc = 0; + xmlNode *xml_op = NULL; + struct ra_metadata_s *metadata = NULL; + const char *caller_version = NULL; + lrm_state_t *lrm_state = NULL; + + if (op == NULL) { + return; + } + + target_rc = rsc_op_expected_rc(op); + + caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION); + CRM_CHECK(caller_version != NULL, caller_version = CRM_FEATURE_SET); + + xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc, + controld_globals.our_nodename, func); + if (xml_op == NULL) { + return; + } + + if ((rsc == NULL) || (op->params == NULL) + || !crm_op_needs_metadata(rsc->standard, op->op_type)) { + + crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)", + op->op_type, op->rsc_id, op->params, rsc); + return; + } + + lrm_state = lrm_state_find(node_name); + if (lrm_state == NULL) { + crm_warn("Cannot calculate digests for operation " PCMK__OP_FMT + " because we have no connection to executor for %s", + op->rsc_id, op->op_type, op->interval_ms, node_name); + return; + } + + /* Ideally the metadata is cached, and the agent is just a fallback. + * + * @TODO Go through all callers and ensure they get metadata asynchronously + * first. + */ + metadata = controld_get_rsc_metadata(lrm_state, rsc, + controld_metadata_from_agent + |controld_metadata_from_cache); + if (metadata == NULL) { + return; + } + + crm_trace("Including additional digests for %s:%s:%s", + rsc->standard, rsc->provider, rsc->type); + append_restart_list(op, metadata, xml_op, caller_version); + append_secure_list(op, metadata, xml_op, caller_version); + + return; +} + +/*! + * \internal + * \brief Record an action as pending in the CIB, if appropriate + * + * \param[in] node_name Node where the action is pending + * \param[in] rsc Resource that action is for + * \param[in,out] op Pending action + * + * \return true if action was recorded in CIB, otherwise false + */ +bool +controld_record_pending_op(const char *node_name, const lrmd_rsc_info_t *rsc, + lrmd_event_data_t *op) +{ + const char *record_pending = NULL; + + CRM_CHECK((node_name != NULL) && (rsc != NULL) && (op != NULL), + return false); + + // Never record certain operation types as pending + if ((op->op_type == NULL) || (op->params == NULL) + || !controld_action_is_recordable(op->op_type)) { + return false; + } + + // Check action's record-pending meta-attribute (defaults to true) + record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING); + if ((record_pending != NULL) && !crm_is_true(record_pending)) { + return false; + } + + op->call_id = -1; + op->t_run = time(NULL); + op->t_rcchange = op->t_run; + + lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); + + crm_debug("Recording pending %s-interval %s for %s on %s in the CIB", + pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id, + node_name); + controld_update_resource_history(node_name, rsc, op, 0); + return true; +} + +static void +cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + switch (rc) { + case pcmk_ok: + case -pcmk_err_diff_failed: + case -pcmk_err_diff_resync: + crm_trace("Resource update %d complete: rc=%d", call_id, rc); + break; + default: + crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc)); + } + + if (call_id == pending_rsc_update) { + pending_rsc_update = 0; + controld_trigger_fsa(); + } +} + +/* Only successful stops, and probes that found the resource inactive, get locks + * recorded in the history. This ensures the resource stays locked to the node + * until it is active there again after the node comes back up. + */ +static bool +should_preserve_lock(lrmd_event_data_t *op) +{ + if (!pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { + return false; + } + if (!strcmp(op->op_type, RSC_STOP) && (op->rc == PCMK_OCF_OK)) { + return true; + } + if (!strcmp(op->op_type, RSC_STATUS) && (op->rc == PCMK_OCF_NOT_RUNNING)) { + return true; + } + return false; +} + +/*! + * \internal + * \brief Request a CIB update + * + * \param[in] section Section of CIB to update + * \param[in,out] data New XML of CIB section to update + * \param[in] options CIB call options + * \param[in] callback If not NULL, set this as the operation callback + * + * \return Standard Pacemaker return code + * + * \note If \p callback is \p cib_rsc_callback(), the CIB update's call ID is + * stored in \p pending_rsc_update on success. + */ +int +controld_update_cib(const char *section, xmlNode *data, int options, + void (*callback)(xmlNode *, int, int, xmlNode *, void *)) +{ + int cib_rc = -ENOTCONN; + + CRM_ASSERT(data != NULL); + + if (controld_globals.cib_conn != NULL) { + cib_rc = cib_internal_op(controld_globals.cib_conn, + PCMK__CIB_REQUEST_MODIFY, NULL, section, + data, NULL, options, NULL); + if (cib_rc >= 0) { + crm_debug("Submitted CIB update %d for %s section", + cib_rc, section); + } + } + + if (callback == NULL) { + if (cib_rc < 0) { + crm_err("Failed to update CIB %s section: %s", + section, pcmk_rc_str(pcmk_legacy2rc(cib_rc))); + } + + } else { + if ((cib_rc >= 0) && (callback == cib_rsc_callback)) { + /* Checking for a particular callback is a little hacky, but it + * didn't seem worth adding an output argument for cib_rc for just + * one use case. + */ + pending_rsc_update = cib_rc; + } + fsa_register_cib_callback(cib_rc, NULL, callback); + } + + return (cib_rc >= 0)? pcmk_rc_ok : pcmk_legacy2rc(cib_rc); +} + +/*! + * \internal + * \brief Update resource history entry in CIB + * + * \param[in] node_name Node where action occurred + * \param[in] rsc Resource that action is for + * \param[in,out] op Action to record + * \param[in] lock_time If nonzero, when resource was locked to node + * + * \note On success, the CIB update's call ID will be stored in + * pending_rsc_update. + */ +void +controld_update_resource_history(const char *node_name, + const lrmd_rsc_info_t *rsc, + lrmd_event_data_t *op, time_t lock_time) +{ + xmlNode *update = NULL; + xmlNode *xml = NULL; + int call_opt = crmd_cib_smart_opt(); + const char *node_id = NULL; + const char *container = NULL; + + CRM_CHECK((node_name != NULL) && (op != NULL), return); + + if (rsc == NULL) { + crm_warn("Resource %s no longer exists in the executor", op->rsc_id); + controld_ack_event_directly(NULL, NULL, rsc, op, op->rsc_id); + return; + } + + // <status> + update = create_xml_node(NULL, XML_CIB_TAG_STATUS); + + // <node_state ...> + xml = create_xml_node(update, XML_CIB_TAG_STATE); + if (pcmk__str_eq(node_name, controld_globals.our_nodename, + pcmk__str_casei)) { + node_id = controld_globals.our_uuid; + } else { + node_id = node_name; + pcmk__xe_set_bool_attr(xml, XML_NODE_IS_REMOTE, true); + } + crm_xml_add(xml, XML_ATTR_ID, node_id); + crm_xml_add(xml, XML_ATTR_UNAME, node_name); + crm_xml_add(xml, XML_ATTR_ORIGIN, __func__); + + // <lrm ...> + xml = create_xml_node(xml, XML_CIB_TAG_LRM); + crm_xml_add(xml, XML_ATTR_ID, node_id); + + // <lrm_resources> + xml = create_xml_node(xml, XML_LRM_TAG_RESOURCES); + + // <lrm_resource ...> + xml = create_xml_node(xml, XML_LRM_TAG_RESOURCE); + crm_xml_add(xml, XML_ATTR_ID, op->rsc_id); + crm_xml_add(xml, XML_AGENT_ATTR_CLASS, rsc->standard); + crm_xml_add(xml, XML_AGENT_ATTR_PROVIDER, rsc->provider); + crm_xml_add(xml, XML_ATTR_TYPE, rsc->type); + if (lock_time != 0) { + /* Actions on a locked resource should either preserve the lock by + * recording it with the action result, or clear it. + */ + if (!should_preserve_lock(op)) { + lock_time = 0; + } + crm_xml_add_ll(xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK, + (long long) lock_time); + } + if (op->params != NULL) { + container = g_hash_table_lookup(op->params, + CRM_META "_" XML_RSC_ATTR_CONTAINER); + if (container != NULL) { + crm_trace("Resource %s is a part of container resource %s", + op->rsc_id, container); + crm_xml_add(xml, XML_RSC_ATTR_CONTAINER, container); + } + } + + // <lrm_resource_op ...> (possibly more than one) + controld_add_resource_history_xml(xml, rsc, op, node_name); + + /* Update CIB asynchronously. Even if it fails, the resource state should be + * discovered during the next election. Worst case, the node is wrongly + * fenced for running a resource it isn't. + */ + crm_log_xml_trace(update, __func__); + controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, cib_rsc_callback); + free_xml(update); +} + +/*! + * \internal + * \brief Erase an LRM history entry from the CIB, given the operation data + * + * \param[in] op Operation whose history should be deleted + */ +void +controld_delete_action_history(const lrmd_event_data_t *op) +{ + xmlNode *xml_top = NULL; + + CRM_CHECK(op != NULL, return); + + xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); + crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); + crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); + + if (op->interval_ms > 0) { + char *op_id = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms); + + /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ + crm_xml_add(xml_top, XML_ATTR_ID, op_id); + free(op_id); + } + + crm_debug("Erasing resource operation history for " PCMK__OP_FMT " (call=%d)", + op->rsc_id, op->op_type, op->interval_ms, op->call_id); + + controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, + XML_CIB_TAG_STATUS, xml_top, + cib_none); + + crm_log_xml_trace(xml_top, "op:cancel"); + free_xml(xml_top); +} + +/* Define xpath to find LRM resource history entry by node and resource */ +#define XPATH_HISTORY \ + "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ + "/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \ + "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ + "/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \ + "/" XML_LRM_TAG_RSC_OP + +/* ... and also by operation key */ +#define XPATH_HISTORY_ID XPATH_HISTORY \ + "[@" XML_ATTR_ID "='%s']" + +/* ... and also by operation key and operation call ID */ +#define XPATH_HISTORY_CALL XPATH_HISTORY \ + "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_CALLID "='%d']" + +/* ... and also by operation key and original operation key */ +#define XPATH_HISTORY_ORIG XPATH_HISTORY \ + "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_TASK_KEY "='%s']" + +/*! + * \internal + * \brief Delete a last_failure resource history entry from the CIB + * + * \param[in] rsc_id Name of resource to clear history for + * \param[in] node Name of node to clear history for + * \param[in] action If specified, delete only if this was failed action + * \param[in] interval_ms If \p action is specified, it has this interval + */ +void +controld_cib_delete_last_failure(const char *rsc_id, const char *node, + const char *action, guint interval_ms) +{ + char *xpath = NULL; + char *last_failure_key = NULL; + + CRM_CHECK((rsc_id != NULL) && (node != NULL), return); + + // Generate XPath to match desired entry + last_failure_key = pcmk__op_key(rsc_id, "last_failure", 0); + if (action == NULL) { + xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id, + last_failure_key); + } else { + char *action_key = pcmk__op_key(rsc_id, action, interval_ms); + + xpath = crm_strdup_printf(XPATH_HISTORY_ORIG, node, rsc_id, + last_failure_key, action_key); + free(action_key); + } + free(last_failure_key); + + controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath, + NULL, cib_xpath); + free(xpath); +} + +/*! + * \internal + * \brief Delete resource history entry from the CIB, given operation key + * + * \param[in] rsc_id Name of resource to clear history for + * \param[in] node Name of node to clear history for + * \param[in] key Operation key of operation to clear history for + * \param[in] call_id If specified, delete entry only if it has this call ID + */ +void +controld_delete_action_history_by_key(const char *rsc_id, const char *node, + const char *key, int call_id) +{ + char *xpath = NULL; + + CRM_CHECK((rsc_id != NULL) && (node != NULL) && (key != NULL), return); + + if (call_id > 0) { + xpath = crm_strdup_printf(XPATH_HISTORY_CALL, node, rsc_id, key, + call_id); + } else { + xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id, key); + } + controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath, + NULL, cib_xpath); + free(xpath); +} diff --git a/daemons/controld/controld_cib.h b/daemons/controld/controld_cib.h new file mode 100644 index 0000000..bd9492a --- /dev/null +++ b/daemons/controld/controld_cib.h @@ -0,0 +1,125 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef PCMK__CONTROLD_CIB__H +#define PCMK__CONTROLD_CIB__H + +#include <crm_internal.h> + +#include <glib.h> + +#include <crm/crm.h> +#include <crm/common/xml.h> +#include <crm/cib/internal.h> // PCMK__CIB_REQUEST_MODIFY +#include "controld_globals.h" // controld_globals.cib_conn + +static inline void +fsa_cib_anon_update(const char *section, xmlNode *data) { + if (controld_globals.cib_conn == NULL) { + crm_err("No CIB connection available"); + } else { + controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn, + section, data, + cib_scope_local|cib_can_create); + } +} + +static inline void +fsa_cib_anon_update_discard_reply(const char *section, xmlNode *data) { + if (controld_globals.cib_conn == NULL) { + crm_err("No CIB connection available"); + } else { + controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn, + section, data, + cib_scope_local + |cib_can_create + |cib_discard_reply); + } +} + +void controld_record_cib_replace_call(int call_id); +bool controld_forget_cib_replace_call(int call_id); +void controld_forget_all_cib_replace_calls(void); +void controld_destroy_cib_replacements_table(void); + +int controld_update_cib(const char *section, xmlNode *data, int options, + void (*callback)(xmlNode *, int, int, xmlNode *, + void *)); +unsigned int cib_op_timeout(void); + +// Subsections of node_state +enum controld_section_e { + controld_section_lrm, + controld_section_lrm_unlocked, + controld_section_attrs, + controld_section_all, + controld_section_all_unlocked +}; + +void controld_delete_node_state(const char *uname, + enum controld_section_e section, int options); +int controld_delete_resource_history(const char *rsc_id, const char *node, + const char *user_name, int call_options); + +/* Convenience macro for registering a CIB callback + * (assumes that data can be freed with free()) + */ +# define fsa_register_cib_callback(id, data, fn) do { \ + cib_t *cib_conn = controld_globals.cib_conn; \ + \ + CRM_ASSERT(cib_conn != NULL); \ + cib_conn->cmds->register_callback_full(cib_conn, id, cib_op_timeout(), \ + FALSE, data, #fn, fn, free); \ + } while(0) + +void controld_add_resource_history_xml_as(const char *func, xmlNode *parent, + const lrmd_rsc_info_t *rsc, + lrmd_event_data_t *op, + const char *node_name); + +#define controld_add_resource_history_xml(parent, rsc, op, node_name) \ + controld_add_resource_history_xml_as(__func__, (parent), (rsc), \ + (op), (node_name)) + +bool controld_record_pending_op(const char *node_name, + const lrmd_rsc_info_t *rsc, + lrmd_event_data_t *op); + +void controld_update_resource_history(const char *node_name, + const lrmd_rsc_info_t *rsc, + lrmd_event_data_t *op, time_t lock_time); + +void controld_delete_action_history(const lrmd_event_data_t *op); + +void controld_cib_delete_last_failure(const char *rsc_id, const char *node, + const char *action, guint interval_ms); + +void controld_delete_action_history_by_key(const char *rsc_id, const char *node, + const char *key, int call_id); + +void controld_disconnect_cib_manager(void); + +int crmd_cib_smart_opt(void); + +/*! + * \internal + * \brief Check whether an action type should be recorded in the CIB + * + * \param[in] action Action type + * + * \return true if action should be recorded, false otherwise + */ +static inline bool +controld_action_is_recordable(const char *action) +{ + return !pcmk__str_any_of(action, CRMD_ACTION_CANCEL, CRMD_ACTION_DELETE, + CRMD_ACTION_NOTIFY, CRMD_ACTION_METADATA, NULL); +} + +#endif // PCMK__CONTROLD_CIB__H diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c new file mode 100644 index 0000000..ffc62a0 --- /dev/null +++ b/daemons/controld/controld_control.c @@ -0,0 +1,857 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <sys/param.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include <crm/crm.h> +#include <crm/msg_xml.h> +#include <crm/pengine/rules.h> +#include <crm/cluster/internal.h> +#include <crm/cluster/election_internal.h> +#include <crm/common/ipc_internal.h> + +#include <pacemaker-controld.h> + +static qb_ipcs_service_t *ipcs = NULL; + +static crm_trigger_t *config_read_trigger = NULL; + +#if SUPPORT_COROSYNC +extern gboolean crm_connect_corosync(crm_cluster_t * cluster); +#endif + +void crm_shutdown(int nsig); +static gboolean crm_read_options(gpointer user_data); + +/* A_HA_CONNECT */ +void +do_ha_control(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + gboolean registered = FALSE; + static crm_cluster_t *cluster = NULL; + + if (cluster == NULL) { + cluster = pcmk_cluster_new(); + } + + if (action & A_HA_DISCONNECT) { + crm_cluster_disconnect(cluster); + crm_info("Disconnected from the cluster"); + + controld_set_fsa_input_flags(R_HA_DISCONNECTED); + } + + if (action & A_HA_CONNECT) { + crm_set_status_callback(&peer_update_callback); + crm_set_autoreap(FALSE); + +#if SUPPORT_COROSYNC + if (is_corosync_cluster()) { + registered = crm_connect_corosync(cluster); + } +#endif // SUPPORT_COROSYNC + + if (registered) { + controld_election_init(cluster->uname); + controld_globals.our_nodename = cluster->uname; + controld_globals.our_uuid = cluster->uuid; + if(cluster->uuid == NULL) { + crm_err("Could not obtain local uuid"); + registered = FALSE; + } + } + + if (!registered) { + controld_set_fsa_input_flags(R_HA_DISCONNECTED); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + return; + } + + populate_cib_nodes(node_update_none, __func__); + controld_clear_fsa_input_flags(R_HA_DISCONNECTED); + crm_info("Connected to the cluster"); + } + + if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) { + crm_err("Unexpected action %s in %s", fsa_action2string(action), + __func__); + } +} + +/* A_SHUTDOWN */ +void +do_shutdown(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + /* just in case */ + controld_set_fsa_input_flags(R_SHUTDOWN); + controld_disconnect_fencer(FALSE); +} + +/* A_SHUTDOWN_REQ */ +void +do_shutdown_req(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + xmlNode *msg = NULL; + + controld_set_fsa_input_flags(R_SHUTDOWN); + //controld_set_fsa_input_flags(R_STAYDOWN); + crm_info("Sending shutdown request to all peers (DC is %s)", + pcmk__s(controld_globals.dc_name, "not set")); + msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); + + if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } + free_xml(msg); +} + +void +crmd_fast_exit(crm_exit_t exit_code) +{ + if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) { + crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d", + exit_code, CRM_EX_FATAL); + exit_code = CRM_EX_FATAL; + + } else if ((exit_code == CRM_EX_OK) + && pcmk_is_set(controld_globals.fsa_input_register, + R_IN_RECOVERY)) { + crm_err("Could not recover from internal error"); + exit_code = CRM_EX_ERROR; + } + + if (controld_globals.logger_out != NULL) { + controld_globals.logger_out->finish(controld_globals.logger_out, + exit_code, true, NULL); + pcmk__output_free(controld_globals.logger_out); + controld_globals.logger_out = NULL; + } + + crm_exit(exit_code); +} + +crm_exit_t +crmd_exit(crm_exit_t exit_code) +{ + GMainLoop *mloop = controld_globals.mainloop; + + static bool in_progress = FALSE; + + if (in_progress && (exit_code == CRM_EX_OK)) { + crm_debug("Exit is already in progress"); + return exit_code; + + } else if(in_progress) { + crm_notice("Error during shutdown process, exiting now with status %d (%s)", + exit_code, crm_exit_str(exit_code)); + crm_write_blackbox(SIGTRAP, NULL); + crmd_fast_exit(exit_code); + } + + in_progress = TRUE; + crm_trace("Preparing to exit with status %d (%s)", + exit_code, crm_exit_str(exit_code)); + + /* Suppress secondary errors resulting from us disconnecting everything */ + controld_set_fsa_input_flags(R_HA_DISCONNECTED); + +/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */ + + if(ipcs) { + crm_trace("Closing IPC server"); + mainloop_del_ipc_server(ipcs); + ipcs = NULL; + } + + controld_close_attrd_ipc(); + controld_shutdown_schedulerd_ipc(); + controld_disconnect_fencer(TRUE); + + if ((exit_code == CRM_EX_OK) && (controld_globals.mainloop == NULL)) { + crm_debug("No mainloop detected"); + exit_code = CRM_EX_ERROR; + } + + /* On an error, just get out. + * + * Otherwise, make the effort to have mainloop exit gracefully so + * that it (mostly) cleans up after itself and valgrind has less + * to report on - allowing real errors stand out + */ + if (exit_code != CRM_EX_OK) { + crm_notice("Forcing immediate exit with status %d (%s)", + exit_code, crm_exit_str(exit_code)); + crm_write_blackbox(SIGTRAP, NULL); + crmd_fast_exit(exit_code); + } + +/* Clean up as much memory as possible for valgrind */ + + for (GList *iter = controld_globals.fsa_message_queue; iter != NULL; + iter = iter->next) { + fsa_data_t *fsa_data = (fsa_data_t *) iter->data; + + crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]", + fsa_input2string(fsa_data->fsa_input), + fsa_state2string(controld_globals.fsa_state), + fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); + delete_fsa_input(fsa_data); + } + + controld_clear_fsa_input_flags(R_MEMBERSHIP); + + g_list_free(controld_globals.fsa_message_queue); + controld_globals.fsa_message_queue = NULL; + + controld_election_fini(); + + /* Tear down the CIB manager connection, but don't free it yet -- it could + * be used when we drain the mainloop later. + */ + + controld_disconnect_cib_manager(); + + verify_stopped(controld_globals.fsa_state, LOG_WARNING); + controld_clear_fsa_input_flags(R_LRM_CONNECTED); + lrm_state_destroy_all(); + + mainloop_destroy_trigger(config_read_trigger); + config_read_trigger = NULL; + + controld_destroy_fsa_trigger(); + controld_destroy_transition_trigger(); + + pcmk__client_cleanup(); + crm_peer_destroy(); + + controld_free_fsa_timers(); + te_cleanup_stonith_history_sync(NULL, TRUE); + controld_free_sched_timer(); + + free(controld_globals.our_nodename); + controld_globals.our_nodename = NULL; + + free(controld_globals.our_uuid); + controld_globals.our_uuid = NULL; + + free(controld_globals.dc_name); + controld_globals.dc_name = NULL; + + free(controld_globals.dc_version); + controld_globals.dc_version = NULL; + + free(controld_globals.cluster_name); + controld_globals.cluster_name = NULL; + + free(controld_globals.te_uuid); + controld_globals.te_uuid = NULL; + + free_max_generation(); + controld_destroy_cib_replacements_table(); + controld_destroy_failed_sync_table(); + controld_destroy_outside_events_table(); + + mainloop_destroy_signal(SIGPIPE); + mainloop_destroy_signal(SIGUSR1); + mainloop_destroy_signal(SIGTERM); + mainloop_destroy_signal(SIGTRAP); + /* leave SIGCHLD engaged as we might still want to drain some service-actions */ + + if (mloop) { + GMainContext *ctx = g_main_loop_get_context(controld_globals.mainloop); + + /* Don't re-enter this block */ + controld_globals.mainloop = NULL; + + /* no signals on final draining anymore */ + mainloop_destroy_signal(SIGCHLD); + + crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); + + { + int lpc = 0; + + while((g_main_context_pending(ctx) && lpc < 10)) { + lpc++; + crm_trace("Iteration %d", lpc); + g_main_context_dispatch(ctx); + } + } + + crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); + g_main_loop_quit(mloop); + + /* Won't do anything yet, since we're inside it now */ + g_main_loop_unref(mloop); + } else { + mainloop_destroy_signal(SIGCHLD); + } + + cib_delete(controld_globals.cib_conn); + controld_globals.cib_conn = NULL; + + throttle_fini(); + + /* Graceful */ + crm_trace("Done preparing for exit with status %d (%s)", + exit_code, crm_exit_str(exit_code)); + return exit_code; +} + +/* A_EXIT_0, A_EXIT_1 */ +void +do_exit(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + crm_exit_t exit_code = CRM_EX_OK; + int log_level = LOG_INFO; + const char *exit_type = "gracefully"; + + if (action & A_EXIT_1) { + log_level = LOG_ERR; + exit_type = "forcefully"; + exit_code = CRM_EX_ERROR; + } + + verify_stopped(cur_state, LOG_ERR); + do_crm_log(log_level, "Performing %s - %s exiting the controller", + fsa_action2string(action), exit_type); + + crm_info("[%s] stopped (%d)", crm_system_name, exit_code); + crmd_exit(exit_code); +} + +static void sigpipe_ignore(int nsig) { return; } + +/* A_STARTUP */ +void +do_startup(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + crm_debug("Registering Signal Handlers"); + mainloop_add_signal(SIGTERM, crm_shutdown); + mainloop_add_signal(SIGPIPE, sigpipe_ignore); + + config_read_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, + crm_read_options, NULL); + + controld_init_fsa_trigger(); + controld_init_transition_trigger(); + + crm_debug("Creating CIB manager and executor objects"); + controld_globals.cib_conn = cib_new(); + + lrm_state_init_local(); + if (controld_init_fsa_timers() == FALSE) { + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } +} + +// \return libqb error code (0 on success, -errno on error) +static int32_t +accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) +{ + crm_trace("Accepting new IPC client connection"); + if (pcmk__new_client(c, uid, gid) == NULL) { + return -EIO; + } + return 0; +} + +// \return libqb error code (0 on success, -errno on error) +static int32_t +dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size) +{ + uint32_t id = 0; + uint32_t flags = 0; + pcmk__client_t *client = pcmk__find_client(c); + + xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags); + + if (msg == NULL) { + pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL); + return 0; + } + pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_INDETERMINATE); + + CRM_ASSERT(client->user != NULL); + pcmk__update_acl_user(msg, F_CRM_USER, client->user); + + crm_xml_add(msg, F_CRM_SYS_FROM, client->id); + if (controld_authorize_ipc_message(msg, client, NULL)) { + crm_trace("Processing IPC message from client %s", + pcmk__client_name(client)); + route_message(C_IPC_MESSAGE, msg); + } + + controld_trigger_fsa(); + free_xml(msg); + return 0; +} + +static int32_t +ipc_client_disconnected(qb_ipcs_connection_t *c) +{ + pcmk__client_t *client = pcmk__find_client(c); + + if (client) { + crm_trace("Disconnecting %sregistered client %s (%p/%p)", + (client->userdata? "" : "un"), pcmk__client_name(client), + c, client); + free(client->userdata); + pcmk__free_client(client); + controld_trigger_fsa(); + } + return 0; +} + +static void +ipc_connection_destroyed(qb_ipcs_connection_t *c) +{ + crm_trace("Connection %p", c); + ipc_client_disconnected(c); +} + +/* A_STOP */ +void +do_stop(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + crm_trace("Closing IPC server"); + mainloop_del_ipc_server(ipcs); ipcs = NULL; + register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); +} + +/* A_STARTED */ +void +do_started(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + static struct qb_ipcs_service_handlers crmd_callbacks = { + .connection_accept = accept_controller_client, + .connection_created = NULL, + .msg_process = dispatch_controller_ipc, + .connection_closed = ipc_client_disconnected, + .connection_destroyed = ipc_connection_destroyed + }; + + if (cur_state != S_STARTING) { + crm_err("Start cancelled... %s", fsa_state2string(cur_state)); + return; + + } else if (!pcmk_is_set(controld_globals.fsa_input_register, + R_MEMBERSHIP)) { + crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP); + + crmd_fsa_stall(TRUE); + return; + + } else if (!pcmk_is_set(controld_globals.fsa_input_register, + R_LRM_CONNECTED)) { + crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED); + + crmd_fsa_stall(TRUE); + return; + + } else if (!pcmk_is_set(controld_globals.fsa_input_register, + R_CIB_CONNECTED)) { + crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED); + + crmd_fsa_stall(TRUE); + return; + + } else if (!pcmk_is_set(controld_globals.fsa_input_register, + R_READ_CONFIG)) { + crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG); + + crmd_fsa_stall(TRUE); + return; + + } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_PEER_DATA)) { + + crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA); + crmd_fsa_stall(TRUE); + return; + } + + crm_debug("Init server comms"); + ipcs = pcmk__serve_controld_ipc(&crmd_callbacks); + if (ipcs == NULL) { + crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } else { + crm_notice("Pacemaker controller successfully started and accepting connections"); + } + controld_trigger_fencer_connect(); + + controld_clear_fsa_input_flags(R_STARTING); + register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); +} + +/* A_RECOVER */ +void +do_recover(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + controld_set_fsa_input_flags(R_IN_RECOVERY); + crm_warn("Fast-tracking shutdown in response to errors"); + + register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); +} + +static pcmk__cluster_option_t controller_options[] = { + /* name, old name, type, allowed values, + * default value, validator, + * short description, + * long description + */ + { + "dc-version", NULL, "string", NULL, PCMK__VALUE_NONE, NULL, + N_("Pacemaker version on cluster node elected Designated Controller (DC)"), + N_("Includes a hash which identifies the exact changeset the code was " + "built from. Used for diagnostic purposes.") + }, + { + "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL, + N_("The messaging stack on which Pacemaker is currently running"), + N_("Used for informational and diagnostic purposes.") + }, + { + "cluster-name", NULL, "string", NULL, NULL, NULL, + N_("An arbitrary name for the cluster"), + N_("This optional value is mostly for users' convenience as desired " + "in administration, but may also be used in Pacemaker " + "configuration rules via the #cluster-name node attribute, and " + "by higher-level tools and resource agents.") + }, + { + XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time", + NULL, "20s", pcmk__valid_interval_spec, + N_("How long to wait for a response from other nodes during start-up"), + N_("The optimal value will depend on the speed and load of your network " + "and the type of switches used.") + }, + { + XML_CONFIG_ATTR_RECHECK, NULL, "time", + N_("Zero disables polling, while positive values are an interval in seconds" + "(unless other units are specified, for example \"5min\")"), + "15min", pcmk__valid_interval_spec, + N_("Polling interval to recheck cluster state and evaluate rules " + "with date specifications"), + N_("Pacemaker is primarily event-driven, and looks ahead to know when to " + "recheck cluster state for failure timeouts and most time-based " + "rules. However, it will also recheck the cluster after this " + "amount of inactivity, to evaluate rules with date specifications " + "and serve as a fail-safe for certain types of scheduler bugs.") + }, + { + "load-threshold", NULL, "percentage", NULL, + "80%", pcmk__valid_percentage, + N_("Maximum amount of system load that should be used by cluster nodes"), + N_("The cluster will slow down its recovery process when the amount of " + "system resources used (currently CPU) approaches this limit"), + }, + { + "node-action-limit", NULL, "integer", NULL, + "0", pcmk__valid_number, + N_("Maximum number of jobs that can be scheduled per node " + "(defaults to 2x cores)") + }, + { XML_CONFIG_ATTR_FENCE_REACTION, NULL, "string", NULL, "stop", NULL, + N_("How a cluster node should react if notified of its own fencing"), + N_("A cluster node may receive notification of its own fencing if fencing " + "is misconfigured, or if fabric fencing is in use that doesn't cut " + "cluster communication. Allowed values are \"stop\" to attempt to " + "immediately stop Pacemaker and stay stopped, or \"panic\" to attempt " + "to immediately reboot the local node, falling back to stop on failure.") + }, + { + XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL, + "2min", pcmk__valid_interval_spec, + "*** Advanced Use Only ***", + N_("Declare an election failed if it is not decided within this much " + "time. If you need to adjust this value, it probably indicates " + "the presence of a bug.") + }, + { + XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL, + "20min", pcmk__valid_interval_spec, + "*** Advanced Use Only ***", + N_("Exit immediately if shutdown does not complete within this much " + "time. If you need to adjust this value, it probably indicates " + "the presence of a bug.") + }, + { + "join-integration-timeout", "crmd-integration-timeout", "time", NULL, + "3min", pcmk__valid_interval_spec, + "*** Advanced Use Only ***", + N_("If you need to adjust this value, it probably indicates " + "the presence of a bug.") + }, + { + "join-finalization-timeout", "crmd-finalization-timeout", "time", NULL, + "30min", pcmk__valid_interval_spec, + "*** Advanced Use Only ***", + N_("If you need to adjust this value, it probably indicates " + "the presence of a bug.") + }, + { + "transition-delay", "crmd-transition-delay", "time", NULL, + "0s", pcmk__valid_interval_spec, + N_("*** Advanced Use Only *** Enabling this option will slow down " + "cluster recovery under all conditions"), + N_("Delay cluster recovery for this much time to allow for additional " + "events to occur. Useful if your configuration is sensitive to " + "the order in which ping updates arrive.") + }, + { + "stonith-watchdog-timeout", NULL, "time", NULL, + "0", controld_verify_stonith_watchdog_timeout, + N_("How long before nodes can be assumed to be safely down when " + "watchdog-based self-fencing via SBD is in use"), + N_("If this is set to a positive value, lost nodes are assumed to " + "self-fence using watchdog-based SBD within this much time. This " + "does not require a fencing resource to be explicitly configured, " + "though a fence_watchdog resource can be configured, to limit use " + "to specific nodes. If this is set to 0 (the default), the cluster " + "will never assume watchdog-based self-fencing. If this is set to a " + "negative value, the cluster will use twice the local value of the " + "`SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, " + "or otherwise treat this as 0. WARNING: When used, this timeout " + "must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use " + "watchdog-based SBD, and Pacemaker will refuse to start on any of " + "those nodes where this is not true for the local value or SBD is " + "not active. When this is set to a negative value, " + "`SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes " + "that use SBD, otherwise data corruption or loss could occur.") + }, + { + "stonith-max-attempts", NULL, "integer", NULL, + "10", pcmk__valid_positive_number, + N_("How many times fencing can fail before it will no longer be " + "immediately re-attempted on a target") + }, + + // Already documented in libpe_status (other values must be kept identical) + { + "no-quorum-policy", NULL, "select", + "stop, freeze, ignore, demote, suicide", "stop", pcmk__valid_quorum, + N_("What to do when the cluster does not have quorum"), NULL + }, + { + XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL, + "false", pcmk__valid_boolean, + N_("Whether to lock resources to a cleanly shut down node"), + N_("When true, resources active on a node when it is cleanly shut down " + "are kept \"locked\" to that node (not allowed to run elsewhere) " + "until they start again on that node after it rejoins (or for at " + "most shutdown-lock-limit, if set). Stonith resources and " + "Pacemaker Remote connections are never locked. Clone and bundle " + "instances and the promoted role of promotable clones are " + "currently never locked, though support could be added in a future " + "release.") + }, + { + XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL, + "0", pcmk__valid_interval_spec, + N_("Do not lock resources to a cleanly shut down node longer than " + "this"), + N_("If shutdown-lock is true and this is set to a nonzero time " + "duration, shutdown locks will expire after this much time has " + "passed since the shutdown was initiated, even if the node has not " + "rejoined.") + }, +}; + +void +crmd_metadata(void) +{ + const char *desc_short = "Pacemaker controller options"; + const char *desc_long = "Cluster options used by Pacemaker's controller"; + + gchar *s = pcmk__format_option_metadata("pacemaker-controld", desc_short, + desc_long, controller_options, + PCMK__NELEM(controller_options)); + printf("%s", s); + g_free(s); +} + +static void +config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + const char *value = NULL; + GHashTable *config_hash = NULL; + crm_time_t *now = crm_time_new(NULL); + xmlNode *crmconfig = NULL; + xmlNode *alerts = NULL; + + if (rc != pcmk_ok) { + fsa_data_t *msg_data = NULL; + + crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc)); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + + if (rc == -EACCES || rc == -pcmk_err_schema_validation) { + crm_err("The cluster is mis-configured - shutting down and staying down"); + controld_set_fsa_input_flags(R_STAYDOWN); + } + goto bail; + } + + crmconfig = output; + if ((crmconfig) && + (crm_element_name(crmconfig)) && + (strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) { + crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG); + } + if (!crmconfig) { + fsa_data_t *msg_data = NULL; + + crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed"); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + goto bail; + } + + crm_debug("Call %d : Parsing CIB options", call_id); + config_hash = pcmk__strkey_table(free, free); + pe_unpack_nvpairs(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL, + config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL); + + // Validate all options, and use defaults if not already present in hash + pcmk__validate_cluster_options(config_hash, controller_options, + PCMK__NELEM(controller_options)); + + value = g_hash_table_lookup(config_hash, "no-quorum-policy"); + if (pcmk__str_eq(value, "suicide", pcmk__str_casei) && pcmk__locate_sbd()) { + controld_set_global_flags(controld_no_quorum_suicide); + } + + value = g_hash_table_lookup(config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK); + if (crm_is_true(value)) { + controld_set_global_flags(controld_shutdown_lock_enabled); + } else { + controld_clear_global_flags(controld_shutdown_lock_enabled); + } + + value = g_hash_table_lookup(config_hash, + XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT); + controld_globals.shutdown_lock_limit = crm_parse_interval_spec(value) + / 1000; + + value = g_hash_table_lookup(config_hash, "cluster-name"); + pcmk__str_update(&(controld_globals.cluster_name), value); + + // Let subcomponents initialize their own static variables + controld_configure_election(config_hash); + controld_configure_fencing(config_hash); + controld_configure_fsa_timers(config_hash); + controld_configure_throttle(config_hash); + + alerts = first_named_child(output, XML_CIB_TAG_ALERTS); + crmd_unpack_alerts(alerts); + + controld_set_fsa_input_flags(R_READ_CONFIG); + controld_trigger_fsa(); + + g_hash_table_destroy(config_hash); + bail: + crm_time_free(now); +} + +/*! + * \internal + * \brief Trigger read and processing of the configuration + * + * \param[in] fn Calling function name + * \param[in] line Line number where call occurred + */ +void +controld_trigger_config_as(const char *fn, int line) +{ + if (config_read_trigger != NULL) { + crm_trace("%s:%d - Triggered config processing", fn, line); + mainloop_set_trigger(config_read_trigger); + } +} + +gboolean +crm_read_options(gpointer user_data) +{ + cib_t *cib_conn = controld_globals.cib_conn; + int call_id = cib_conn->cmds->query(cib_conn, + "//" XML_CIB_TAG_CRMCONFIG + " | //" XML_CIB_TAG_ALERTS, + NULL, cib_xpath|cib_scope_local); + + fsa_register_cib_callback(call_id, NULL, config_query_callback); + crm_trace("Querying the CIB... call %d", call_id); + return TRUE; +} + +/* A_READCONFIG */ +void +do_read_config(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + throttle_init(); + controld_trigger_config(); +} + +void +crm_shutdown(int nsig) +{ + const char *value = NULL; + guint default_period_ms = 0; + + if ((controld_globals.mainloop == NULL) + || !g_main_loop_is_running(controld_globals.mainloop)) { + crmd_exit(CRM_EX_OK); + return; + } + + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + crm_err("Escalating shutdown"); + register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); + return; + } + + controld_set_fsa_input_flags(R_SHUTDOWN); + register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); + + /* If shutdown timer doesn't have a period set, use the default + * + * @TODO: Evaluate whether this is still necessary. As long as + * config_query_callback() has been run at least once, it doesn't look like + * anything could have changed the timer period since then. + */ + value = pcmk__cluster_option(NULL, controller_options, + PCMK__NELEM(controller_options), + XML_CONFIG_ATTR_FORCE_QUIT); + default_period_ms = crm_parse_interval_spec(value); + controld_shutdown_start_countdown(default_period_ms); +} diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c new file mode 100644 index 0000000..4378b30 --- /dev/null +++ b/daemons/controld/controld_corosync.c @@ -0,0 +1,164 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <sys/param.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include <crm/crm.h> +#include <crm/cluster/internal.h> +#include <crm/common/xml.h> + +#include <pacemaker-controld.h> + +#if SUPPORT_COROSYNC + +extern void post_cache_update(int seq); + +/* A_HA_CONNECT */ + +static void +crmd_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, + uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) +{ + uint32_t kind = 0; + const char *from = NULL; + char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); + + if(data == NULL) { + return; + } + if (kind == crm_class_cluster) { + crm_node_t *peer = NULL; + xmlNode *xml = string2xml(data); + + if (xml == NULL) { + crm_err("Could not parse message content (%d): %.100s", kind, data); + free(data); + return; + } + + crm_xml_add(xml, F_ORIG, from); + /* crm_xml_add_int(xml, F_SEQ, wrapper->id); Fake? */ + + peer = crm_get_peer(0, from); + if (!pcmk_is_set(peer->processes, crm_proc_cpg)) { + /* If we can still talk to our peer process on that node, + * then it must be part of the corosync membership + */ + crm_warn("Receiving messages from a node we think is dead: %s[%d]", + peer->uname, peer->id); + crm_update_peer_proc(__func__, peer, crm_proc_cpg, + ONLINESTATUS); + } + crmd_ha_msg_filter(xml); + free_xml(xml); + } else { + crm_err("Invalid message class (%d): %.100s", kind, data); + } + free(data); +} + +static gboolean +crmd_quorum_callback(unsigned long long seq, gboolean quorate) +{ + crm_update_quorum(quorate, FALSE); + post_cache_update(seq); + return TRUE; +} + +static void +crmd_cs_destroy(gpointer user_data) +{ + if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) { + crm_crit("Lost connection to cluster layer, shutting down"); + crmd_exit(CRM_EX_DISCONNECT); + + } else { + crm_info("Corosync connection closed"); + } +} + +/*! + * \brief Handle a Corosync notification of a CPG configuration change + * + * \param[in] handle CPG connection + * \param[in] cpg_name CPG group name + * \param[in] member_list List of current CPG members + * \param[in] member_list_entries Number of entries in \p member_list + * \param[in] left_list List of CPG members that left + * \param[in] left_list_entries Number of entries in \p left_list + * \param[in] joined_list List of CPG members that joined + * \param[in] joined_list_entries Number of entries in \p joined_list + */ +static void +cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name, + const struct cpg_address *member_list, + size_t member_list_entries, + const struct cpg_address *left_list, + size_t left_list_entries, + const struct cpg_address *joined_list, + size_t joined_list_entries) +{ + /* When nodes leave CPG, the DC clears their transient node attributes. + * + * However if there is no DC, or the DC is among the nodes that left, each + * remaining node needs to do the clearing, to ensure it gets done. + * Otherwise, the attributes would persist when the nodes rejoin, which + * could have serious consequences for unfencing, agents that use attributes + * for internal logic, etc. + * + * Here, we set a global boolean if the DC is among the nodes that left, for + * use by the peer callback. + */ + if (controld_globals.dc_name != NULL) { + crm_node_t *peer = NULL; + + peer = pcmk__search_cluster_node_cache(0, controld_globals.dc_name); + if (peer != NULL) { + for (int i = 0; i < left_list_entries; ++i) { + if (left_list[i].nodeid == peer->id) { + controld_set_global_flags(controld_dc_left); + break; + } + } + } + } + + // Process the change normally, which will call the peer callback as needed + pcmk_cpg_membership(handle, cpg_name, member_list, member_list_entries, + left_list, left_list_entries, + joined_list, joined_list_entries); + + controld_clear_global_flags(controld_dc_left); +} + +extern gboolean crm_connect_corosync(crm_cluster_t * cluster); + +gboolean +crm_connect_corosync(crm_cluster_t * cluster) +{ + if (is_corosync_cluster()) { + crm_set_status_callback(&peer_update_callback); + cluster->cpg.cpg_deliver_fn = crmd_cs_dispatch; + cluster->cpg.cpg_confchg_fn = cpg_membership_callback; + cluster->destroy = crmd_cs_destroy; + + if (crm_cluster_connect(cluster)) { + pcmk__corosync_quorum_connect(crmd_quorum_callback, + crmd_cs_destroy); + return TRUE; + } + } + return FALSE; +} + +#endif diff --git a/daemons/controld/controld_election.c b/daemons/controld/controld_election.c new file mode 100644 index 0000000..5f33d5b --- /dev/null +++ b/daemons/controld/controld_election.c @@ -0,0 +1,292 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <sys/time.h> +#include <sys/resource.h> + +#include <crm/msg_xml.h> +#include <crm/common/xml.h> +#include <crm/cluster/internal.h> +#include <crm/cluster/election_internal.h> +#include <crm/crm.h> + +#include <pacemaker-controld.h> + +static election_t *fsa_election = NULL; + +static gboolean +election_win_cb(gpointer data) +{ + register_fsa_input(C_FSA_INTERNAL, I_ELECTION_DC, NULL); + return FALSE; +} + +void +controld_election_init(const char *uname) +{ + fsa_election = election_init("DC", uname, 60000 /*60s*/, election_win_cb); +} + +/*! + * \internal + * \brief Configure election options based on the CIB + * + * \param[in,out] options Name/value pairs for configured options + */ +void +controld_configure_election(GHashTable *options) +{ + const char *value = NULL; + + value = g_hash_table_lookup(options, XML_CONFIG_ATTR_ELECTION_FAIL); + election_timeout_set_period(fsa_election, crm_parse_interval_spec(value)); +} + +void +controld_remove_voter(const char *uname) +{ + election_remove(fsa_election, uname); + + if (pcmk__str_eq(uname, controld_globals.dc_name, pcmk__str_casei)) { + /* Clear any election dampening in effect. Otherwise, if the lost DC had + * just won, an immediate new election could fizzle out with no new DC. + */ + election_clear_dampening(fsa_election); + } +} + +void +controld_election_fini(void) +{ + election_fini(fsa_election); + fsa_election = NULL; +} + +void +controld_stop_current_election_timeout(void) +{ + election_timeout_stop(fsa_election); +} + +/* A_ELECTION_VOTE */ +void +do_election_vote(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + gboolean not_voting = FALSE; + + /* don't vote if we're in one of these states or wanting to shut down */ + switch (cur_state) { + case S_STARTING: + case S_RECOVERY: + case S_STOPPING: + case S_TERMINATE: + crm_warn("Not voting in election, we're in state %s", fsa_state2string(cur_state)); + not_voting = TRUE; + break; + case S_ELECTION: + case S_INTEGRATION: + case S_RELEASE_DC: + break; + default: + crm_err("Broken? Voting in state %s", fsa_state2string(cur_state)); + break; + } + + if (not_voting == FALSE) { + if (pcmk_is_set(controld_globals.fsa_input_register, R_STARTING)) { + not_voting = TRUE; + } + } + + if (not_voting) { + if (AM_I_DC) { + register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL); + + } else { + register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL); + } + return; + } + + election_vote(fsa_election); + return; +} + +void +do_election_check(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + if (controld_globals.fsa_state == S_ELECTION) { + election_check(fsa_election); + } else { + crm_debug("Ignoring election check because we are not in an election"); + } +} + +/* A_ELECTION_COUNT */ +void +do_election_count_vote(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + enum election_result rc = 0; + ha_msg_input_t *vote = fsa_typed_data(fsa_dt_ha_msg); + + if(crm_peer_cache == NULL) { + if (!pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + crm_err("Internal error, no peer cache"); + } + return; + } + + rc = election_count_vote(fsa_election, vote->msg, cur_state != S_STARTING); + switch(rc) { + case election_start: + election_reset(fsa_election); + register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); + break; + + case election_lost: + update_dc(NULL); + + if (pcmk_is_set(controld_globals.fsa_input_register, R_THE_DC)) { + cib_t *cib_conn = controld_globals.cib_conn; + + register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL); + cib_conn->cmds->set_secondary(cib_conn, cib_scope_local); + + } else if (cur_state != S_STARTING) { + register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL); + } + break; + + default: + crm_trace("Election message resulted in state %d", rc); + } +} + +static void +feature_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + if (rc != pcmk_ok) { + fsa_data_t *msg_data = NULL; + + crm_notice("Feature update failed: %s "CRM_XS" rc=%d", + pcmk_strerror(rc), rc); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } +} + +/*! + * \internal + * \brief Update a node attribute in the CIB during a DC takeover + * + * \param[in] name Name of attribute to update + * \param[in] value New attribute value + */ +#define dc_takeover_update_attr(name, value) do { \ + cib__update_node_attr(controld_globals.logger_out, \ + controld_globals.cib_conn, cib_none, \ + XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, \ + name, value, NULL, NULL); \ + } while (0) + +/* A_DC_TAKEOVER */ +void +do_dc_takeover(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + xmlNode *cib = NULL; + const char *cluster_type = name_for_cluster_type(get_cluster_type()); + pid_t watchdog = pcmk__locate_sbd(); + + crm_info("Taking over DC status for this partition"); + controld_set_fsa_input_flags(R_THE_DC); + execute_stonith_cleanup(); + + election_reset(fsa_election); + controld_set_fsa_input_flags(R_JOIN_OK|R_INVOKE_PE); + + controld_globals.cib_conn->cmds->set_primary(controld_globals.cib_conn, + cib_scope_local); + + cib = create_xml_node(NULL, XML_TAG_CIB); + crm_xml_add(cib, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); + controld_update_cib(XML_TAG_CIB, cib, cib_none, feature_update_callback); + + dc_takeover_update_attr(XML_ATTR_HAVE_WATCHDOG, pcmk__btoa(watchdog)); + dc_takeover_update_attr("dc-version", PACEMAKER_VERSION "-" BUILD_VERSION); + dc_takeover_update_attr("cluster-infrastructure", cluster_type); + +#if SUPPORT_COROSYNC + if ((controld_globals.cluster_name == NULL) && is_corosync_cluster()) { + char *cluster_name = pcmk__corosync_cluster_name(); + + if (cluster_name != NULL) { + dc_takeover_update_attr("cluster-name", cluster_name); + } + free(cluster_name); + } +#endif + + controld_trigger_config(); + free_xml(cib); +} + +/* A_DC_RELEASE */ +void +do_dc_release(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + if (action & A_DC_RELEASE) { + crm_debug("Releasing the role of DC"); + controld_clear_fsa_input_flags(R_THE_DC); + controld_expect_sched_reply(NULL); + + } else if (action & A_DC_RELEASED) { + crm_info("DC role released"); +#if 0 + if (are there errors) { + /* we can't stay up if not healthy */ + /* or perhaps I_ERROR and go to S_RECOVER? */ + result = I_SHUTDOWN; + } +#endif + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + xmlNode *update = NULL; + crm_node_t *node = crm_get_peer(0, controld_globals.our_nodename); + + pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN); + update = create_node_state_update(node, node_update_expected, NULL, + __func__); + /* Don't need a based response because controld will stop. */ + fsa_cib_anon_update_discard_reply(XML_CIB_TAG_STATUS, update); + free_xml(update); + } + register_fsa_input(C_FSA_INTERNAL, I_RELEASE_SUCCESS, NULL); + + } else { + crm_err("Unknown DC action %s", fsa_action2string(action)); + } + + crm_trace("Am I still the DC? %s", AM_I_DC ? XML_BOOLEAN_YES : XML_BOOLEAN_NO); + +} diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c new file mode 100644 index 0000000..0de399c --- /dev/null +++ b/daemons/controld/controld_execd.c @@ -0,0 +1,2433 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <regex.h> +#include <sys/param.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include <crm/crm.h> +#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_rsc_info_t, etc. +#include <crm/services.h> +#include <crm/msg_xml.h> +#include <crm/common/xml.h> +#include <crm/pengine/rules.h> +#include <crm/lrmd_internal.h> + +#include <pacemaker-internal.h> +#include <pacemaker-controld.h> + +#define START_DELAY_THRESHOLD 5 * 60 * 1000 +#define MAX_LRM_REG_FAILS 30 + +struct delete_event_s { + int rc; + const char *rsc; + lrm_state_t *lrm_state; +}; + +static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id); +static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list); +static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); + +static lrmd_event_data_t *construct_op(const lrm_state_t *lrm_state, + const xmlNode *rsc_op, + const char *rsc_id, + const char *operation); +static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, + xmlNode *msg, struct ra_metadata_s *md); + +static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, + int log_level); + +static void +lrm_connection_destroy(void) +{ + if (pcmk_is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) { + crm_crit("Connection to executor failed"); + register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); + controld_clear_fsa_input_flags(R_LRM_CONNECTED); + + } else { + crm_info("Disconnected from executor"); + } + +} + +static char * +make_stop_id(const char *rsc, int call_id) +{ + return crm_strdup_printf("%s:%d", rsc, call_id); +} + +static void +copy_instance_keys(gpointer key, gpointer value, gpointer user_data) +{ + if (strstr(key, CRM_META "_") == NULL) { + g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); + } +} + +static void +copy_meta_keys(gpointer key, gpointer value, gpointer user_data) +{ + if (strstr(key, CRM_META "_") != NULL) { + g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); + } +} + +/*! + * \internal + * \brief Remove a recurring operation from a resource's history + * + * \param[in,out] history Resource history to modify + * \param[in] op Operation to remove + * + * \return TRUE if the operation was found and removed, FALSE otherwise + */ +static gboolean +history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op) +{ + GList *iter; + + for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { + lrmd_event_data_t *existing = iter->data; + + if ((op->interval_ms == existing->interval_ms) + && pcmk__str_eq(op->rsc_id, existing->rsc_id, pcmk__str_none) + && pcmk__str_eq(op->op_type, existing->op_type, pcmk__str_casei)) { + + history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter); + lrmd_free_event(existing); + return TRUE; + } + } + return FALSE; +} + +/*! + * \internal + * \brief Free all recurring operations in resource history + * + * \param[in,out] history Resource history to modify + */ +static void +history_free_recurring_ops(rsc_history_t *history) +{ + GList *iter; + + for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { + lrmd_free_event(iter->data); + } + g_list_free(history->recurring_op_list); + history->recurring_op_list = NULL; +} + +/*! + * \internal + * \brief Free resource history + * + * \param[in,out] history Resource history to free + */ +void +history_free(gpointer data) +{ + rsc_history_t *history = (rsc_history_t*)data; + + if (history->stop_params) { + g_hash_table_destroy(history->stop_params); + } + + /* Don't need to free history->rsc.id because it's set to history->id */ + free(history->rsc.type); + free(history->rsc.standard); + free(history->rsc.provider); + + lrmd_free_event(history->failed); + lrmd_free_event(history->last); + free(history->id); + history_free_recurring_ops(history); + free(history); +} + +static void +update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) +{ + int target_rc = 0; + rsc_history_t *entry = NULL; + + if (op->rsc_deleted) { + crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); + controld_delete_resource_history(op->rsc_id, lrm_state->node_name, + NULL, crmd_cib_smart_opt()); + return; + } + + if (pcmk__str_eq(op->op_type, RSC_NOTIFY, pcmk__str_casei)) { + return; + } + + crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type); + + entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id); + if (entry == NULL && rsc) { + entry = calloc(1, sizeof(rsc_history_t)); + entry->id = strdup(op->rsc_id); + g_hash_table_insert(lrm_state->resource_history, entry->id, entry); + + entry->rsc.id = entry->id; + entry->rsc.type = strdup(rsc->type); + entry->rsc.standard = strdup(rsc->standard); + pcmk__str_update(&entry->rsc.provider, rsc->provider); + + } else if (entry == NULL) { + crm_info("Resource %s no longer exists, not updating cache", op->rsc_id); + return; + } + + entry->last_callid = op->call_id; + target_rc = rsc_op_expected_rc(op); + if (op->op_status == PCMK_EXEC_CANCELLED) { + if (op->interval_ms > 0) { + crm_trace("Removing cancelled recurring op: " PCMK__OP_FMT, + op->rsc_id, op->op_type, op->interval_ms); + history_remove_recurring_op(entry, op); + return; + } else { + crm_trace("Skipping " PCMK__OP_FMT " rc=%d, status=%d", + op->rsc_id, op->op_type, op->interval_ms, op->rc, + op->op_status); + } + + } else if (did_rsc_op_fail(op, target_rc)) { + /* Store failed monitors here, otherwise the block below will cause them + * to be forgotten when a stop happens. + */ + if (entry->failed) { + lrmd_free_event(entry->failed); + } + entry->failed = lrmd_copy_event(op); + + } else if (op->interval_ms == 0) { + if (entry->last) { + lrmd_free_event(entry->last); + } + entry->last = lrmd_copy_event(op); + + if (op->params && pcmk__strcase_any_of(op->op_type, CRMD_ACTION_START, + CRMD_ACTION_RELOAD, + CRMD_ACTION_RELOAD_AGENT, + CRMD_ACTION_STATUS, NULL)) { + if (entry->stop_params) { + g_hash_table_destroy(entry->stop_params); + } + entry->stop_params = pcmk__strkey_table(free, free); + + g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params); + } + } + + if (op->interval_ms > 0) { + /* Ensure there are no duplicates */ + history_remove_recurring_op(entry, op); + + crm_trace("Adding recurring op: " PCMK__OP_FMT, + op->rsc_id, op->op_type, op->interval_ms); + entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); + + } else if (entry->recurring_op_list && !pcmk__str_eq(op->op_type, RSC_STATUS, pcmk__str_casei)) { + crm_trace("Dropping %d recurring ops because of: " PCMK__OP_FMT, + g_list_length(entry->recurring_op_list), op->rsc_id, + op->op_type, op->interval_ms); + history_free_recurring_ops(entry); + } +} + +/*! + * \internal + * \brief Send a direct OK ack for a resource task + * + * \param[in] lrm_state LRM connection + * \param[in] input Input message being ack'ed + * \param[in] rsc_id ID of affected resource + * \param[in] rsc Affected resource (if available) + * \param[in] task Operation task being ack'ed + * \param[in] ack_host Name of host to send ack to + * \param[in] ack_sys IPC system name to ack + */ +static void +send_task_ok_ack(const lrm_state_t *lrm_state, const ha_msg_input_t *input, + const char *rsc_id, const lrmd_rsc_info_t *rsc, + const char *task, const char *ack_host, const char *ack_sys) +{ + lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task); + + lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); + controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id); + lrmd_free_event(op); +} + +static inline const char * +op_node_name(lrmd_event_data_t *op) +{ + return pcmk__s(op->remote_nodename, controld_globals.our_nodename); +} + +void +lrm_op_callback(lrmd_event_data_t * op) +{ + CRM_CHECK(op != NULL, return); + switch (op->type) { + case lrmd_event_disconnect: + if (op->remote_nodename == NULL) { + /* If this is the local executor IPC connection, set the right + * bits in the controller when the connection goes down. + */ + lrm_connection_destroy(); + } + break; + + case lrmd_event_exec_complete: + { + lrm_state_t *lrm_state = lrm_state_find(op_node_name(op)); + + CRM_ASSERT(lrm_state != NULL); + process_lrm_event(lrm_state, op, NULL, NULL); + } + break; + + default: + break; + } +} + +static void +try_local_executor_connect(long long action, fsa_data_t *msg_data, + lrm_state_t *lrm_state) +{ + int rc = pcmk_rc_ok; + + crm_debug("Connecting to the local executor"); + + // If we can connect, great + rc = controld_connect_local_executor(lrm_state); + if (rc == pcmk_rc_ok) { + controld_set_fsa_input_flags(R_LRM_CONNECTED); + crm_info("Connection to the local executor established"); + return; + } + + // Otherwise, if we can try again, set a timer to do so + if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) { + crm_warn("Failed to connect to the local executor %d time%s " + "(%d max): %s", lrm_state->num_lrm_register_fails, + pcmk__plural_s(lrm_state->num_lrm_register_fails), + MAX_LRM_REG_FAILS, pcmk_rc_str(rc)); + controld_start_wait_timer(); + crmd_fsa_stall(FALSE); + return; + } + + // Otherwise give up + crm_err("Failed to connect to the executor the max allowed " + "%d time%s: %s", lrm_state->num_lrm_register_fails, + pcmk__plural_s(lrm_state->num_lrm_register_fails), + pcmk_rc_str(rc)); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); +} + +/* A_LRM_CONNECT */ +void +do_lrm_control(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + /* This only pertains to local executor connections. Remote connections are + * handled as resources within the scheduler. Connecting and disconnecting + * from remote executor instances is handled differently. + */ + + lrm_state_t *lrm_state = NULL; + + if (controld_globals.our_nodename == NULL) { + return; /* Nothing to do */ + } + lrm_state = lrm_state_find_or_create(controld_globals.our_nodename); + if (lrm_state == NULL) { + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + return; + } + + if (action & A_LRM_DISCONNECT) { + if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) { + if (action == A_LRM_DISCONNECT) { + crmd_fsa_stall(FALSE); + return; + } + } + + controld_clear_fsa_input_flags(R_LRM_CONNECTED); + crm_info("Disconnecting from the executor"); + lrm_state_disconnect(lrm_state); + lrm_state_reset_tables(lrm_state, FALSE); + crm_notice("Disconnected from the executor"); + } + + if (action & A_LRM_CONNECT) { + try_local_executor_connect(action, msg_data, lrm_state); + } + + if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { + crm_err("Unexpected action %s in %s", fsa_action2string(action), + __func__); + } +} + +static gboolean +lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level) +{ + int counter = 0; + gboolean rc = TRUE; + const char *when = "lrm disconnect"; + + GHashTableIter gIter; + const char *key = NULL; + rsc_history_t *entry = NULL; + active_op_t *pending = NULL; + + crm_debug("Checking for active resources before exit"); + + if (cur_state == S_TERMINATE) { + log_level = LOG_ERR; + when = "shutdown"; + + } else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + when = "shutdown... waiting"; + } + + if ((lrm_state->active_ops != NULL) && lrm_state_is_connected(lrm_state)) { + guint removed = g_hash_table_foreach_remove(lrm_state->active_ops, + stop_recurring_actions, + lrm_state); + guint nremaining = g_hash_table_size(lrm_state->active_ops); + + if (removed || nremaining) { + crm_notice("Stopped %u recurring operation%s at %s (%u remaining)", + removed, pcmk__plural_s(removed), when, nremaining); + } + } + + if (lrm_state->active_ops != NULL) { + g_hash_table_iter_init(&gIter, lrm_state->active_ops); + while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) { + /* Ignore recurring actions in the shutdown calculations */ + if (pending->interval_ms == 0) { + counter++; + } + } + } + + if (counter > 0) { + do_crm_log(log_level, "%d pending executor operation%s at %s", + counter, pcmk__plural_s(counter), when); + + if ((cur_state == S_TERMINATE) + || !pcmk_is_set(controld_globals.fsa_input_register, + R_SENT_RSC_STOP)) { + g_hash_table_iter_init(&gIter, lrm_state->active_ops); + while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) { + do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key); + } + + } else { + rc = FALSE; + } + return rc; + } + + if (lrm_state->resource_history == NULL) { + return rc; + } + + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + /* At this point we're not waiting, we're just shutting down */ + when = "shutdown"; + } + + counter = 0; + g_hash_table_iter_init(&gIter, lrm_state->resource_history); + while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) { + if (is_rsc_active(lrm_state, entry->id) == FALSE) { + continue; + } + + counter++; + if (log_level == LOG_ERR) { + crm_info("Found %s active at %s", entry->id, when); + } else { + crm_trace("Found %s active at %s", entry->id, when); + } + if (lrm_state->active_ops != NULL) { + GHashTableIter hIter; + + g_hash_table_iter_init(&hIter, lrm_state->active_ops); + while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) { + if (pcmk__str_eq(entry->id, pending->rsc_id, pcmk__str_none)) { + crm_notice("%sction %s (%s) incomplete at %s", + pending->interval_ms == 0 ? "A" : "Recurring a", + key, pending->op_key, when); + } + } + } + } + + if (counter) { + crm_err("%d resource%s active at %s", + counter, (counter == 1)? " was" : "s were", when); + } + + return rc; +} + +static gboolean +is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id) +{ + rsc_history_t *entry = NULL; + + entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); + if (entry == NULL || entry->last == NULL) { + return FALSE; + } + + crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type, + entry->last->interval_ms, entry->last->rc); + if (entry->last->rc == PCMK_OCF_OK && pcmk__str_eq(entry->last->op_type, CRMD_ACTION_STOP, pcmk__str_casei)) { + return FALSE; + + } else if (entry->last->rc == PCMK_OCF_OK + && pcmk__str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE, pcmk__str_casei)) { + // A stricter check is too complex ... leave that to the scheduler + return FALSE; + + } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) { + return FALSE; + + } else if ((entry->last->interval_ms == 0) + && (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) { + /* Badly configured resources can't be reliably stopped */ + return FALSE; + } + + return TRUE; +} + +static gboolean +build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) +{ + GHashTableIter iter; + rsc_history_t *entry = NULL; + + g_hash_table_iter_init(&iter, lrm_state->resource_history); + while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { + + GList *gIter = NULL; + xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE); + + crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id); + crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type); + crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.standard); + crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider); + + if (entry->last && entry->last->params) { + const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); + if (container) { + crm_trace("Resource %s is a part of container resource %s", entry->id, container); + crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container); + } + } + controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->failed, + lrm_state->node_name); + controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->last, + lrm_state->node_name); + for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) { + controld_add_resource_history_xml(xml_rsc, &(entry->rsc), gIter->data, + lrm_state->node_name); + } + } + + return FALSE; +} + +xmlNode * +controld_query_executor_state(void) +{ + xmlNode *xml_state = NULL; + xmlNode *xml_data = NULL; + xmlNode *rsc_list = NULL; + crm_node_t *peer = NULL; + lrm_state_t *lrm_state = lrm_state_find(controld_globals.our_nodename); + + if (!lrm_state) { + crm_err("Could not find executor state for node %s", + controld_globals.our_nodename); + return NULL; + } + + peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY); + CRM_CHECK(peer != NULL, return NULL); + + xml_state = create_node_state_update(peer, + node_update_cluster|node_update_peer, + NULL, __func__); + if (xml_state == NULL) { + return NULL; + } + + xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); + crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid); + rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); + + /* Build a list of active (not always running) resources */ + build_active_RAs(lrm_state, rsc_list); + + crm_log_xml_trace(xml_state, "Current executor state"); + + return xml_state; +} + +/*! + * \internal + * \brief Map standard Pacemaker return code to operation status and OCF code + * + * \param[out] event Executor event whose status and return code should be set + * \param[in] rc Standard Pacemaker return code + */ +void +controld_rc2event(lrmd_event_data_t *event, int rc) +{ + /* This is called for cleanup requests from controller peers/clients, not + * for resource actions, so no exit reason is needed. + */ + switch (rc) { + case pcmk_rc_ok: + lrmd__set_result(event, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); + break; + case EACCES: + lrmd__set_result(event, PCMK_OCF_INSUFFICIENT_PRIV, + PCMK_EXEC_ERROR, NULL); + break; + default: + lrmd__set_result(event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + NULL); + break; + } +} + +/*! + * \internal + * \brief Trigger a new transition after CIB status was deleted + * + * If a CIB status delete was not expected (as part of the transition graph), + * trigger a new transition by updating the (arbitrary) "last-lrm-refresh" + * cluster property. + * + * \param[in] from_sys IPC name that requested the delete + * \param[in] rsc_id Resource whose status was deleted (for logging only) + */ +void +controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id) +{ + if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) { + char *now_s = crm_strdup_printf("%lld", (long long) time(NULL)); + + crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id); + cib__update_node_attr(controld_globals.logger_out, + controld_globals.cib_conn, cib_none, + XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, + "last-lrm-refresh", now_s, NULL, NULL); + free(now_s); + } +} + +static void +notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc) +{ + lrmd_event_data_t *op = NULL; + const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); + const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); + + crm_info("Notifying %s on %s that %s was%s deleted", + from_sys, (from_host? from_host : "localhost"), rsc_id, + ((rc == pcmk_ok)? "" : " not")); + op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE); + controld_rc2event(op, pcmk_legacy2rc(rc)); + controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id); + lrmd_free_event(op); + controld_trigger_delete_refresh(from_sys, rsc_id); +} + +static gboolean +lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data) +{ + struct delete_event_s *event = user_data; + struct pending_deletion_op_s *op = value; + + if (pcmk__str_eq(event->rsc, op->rsc, pcmk__str_none)) { + notify_deleted(event->lrm_state, op->input, event->rsc, event->rc); + return TRUE; + } + return FALSE; +} + +static gboolean +lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) +{ + const char *rsc = user_data; + active_op_t *pending = value; + + if (pcmk__str_eq(rsc, pending->rsc_id, pcmk__str_none)) { + crm_info("Removing op %s:%d for deleted resource %s", + pending->op_key, pending->call_id, rsc); + return TRUE; + } + return FALSE; +} + +static void +delete_rsc_entry(lrm_state_t *lrm_state, ha_msg_input_t *input, + const char *rsc_id, GHashTableIter *rsc_iter, int rc, + const char *user_name, bool from_cib) +{ + struct delete_event_s event; + + CRM_CHECK(rsc_id != NULL, return); + + if (rc == pcmk_ok) { + char *rsc_id_copy = strdup(rsc_id); + + if (rsc_iter) { + g_hash_table_iter_remove(rsc_iter); + } else { + g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); + } + + if (from_cib) { + controld_delete_resource_history(rsc_id_copy, lrm_state->node_name, + user_name, crmd_cib_smart_opt()); + } + g_hash_table_foreach_remove(lrm_state->active_ops, + lrm_remove_deleted_op, rsc_id_copy); + free(rsc_id_copy); + } + + if (input) { + notify_deleted(lrm_state, input, rsc_id, rc); + } + + event.rc = rc; + event.rsc = rsc_id; + event.lrm_state = lrm_state; + g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event); +} + +static inline gboolean +last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms) +{ + if (entry == NULL) { + return FALSE; + } + if (op == NULL) { + return TRUE; + } + return (pcmk__str_eq(op, entry->failed->op_type, pcmk__str_casei) + && (interval_ms == entry->failed->interval_ms)); +} + +/*! + * \internal + * \brief Clear a resource's last failure + * + * Erase a resource's last failure on a particular node from both the + * LRM resource history in the CIB, and the resource history remembered + * for the LRM state. + * + * \param[in] rsc_id Resource name + * \param[in] node_name Node name + * \param[in] operation If specified, only clear if matching this operation + * \param[in] interval_ms If operation is specified, it has this interval + */ +void +lrm_clear_last_failure(const char *rsc_id, const char *node_name, + const char *operation, guint interval_ms) +{ + lrm_state_t *lrm_state = lrm_state_find(node_name); + + if (lrm_state == NULL) { + return; + } + if (lrm_state->resource_history != NULL) { + rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, + rsc_id); + + if (last_failed_matches_op(entry, operation, interval_ms)) { + lrmd_free_event(entry->failed); + entry->failed = NULL; + } + } +} + +/* Returns: gboolean - cancellation is in progress */ +static gboolean +cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove) +{ + int rc = pcmk_ok; + char *local_key = NULL; + active_op_t *pending = NULL; + + CRM_CHECK(op != 0, return FALSE); + CRM_CHECK(rsc_id != NULL, return FALSE); + if (key == NULL) { + local_key = make_stop_id(rsc_id, op); + key = local_key; + } + pending = g_hash_table_lookup(lrm_state->active_ops, key); + + if (pending) { + if (remove && !pcmk_is_set(pending->flags, active_op_remove)) { + controld_set_active_op_flags(pending, active_op_remove); + crm_debug("Scheduling %s for removal", key); + } + + if (pcmk_is_set(pending->flags, active_op_cancelled)) { + crm_debug("Operation %s already cancelled", key); + free(local_key); + return FALSE; + } + controld_set_active_op_flags(pending, active_op_cancelled); + + } else { + crm_info("No pending op found for %s", key); + free(local_key); + return FALSE; + } + + crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key); + rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, + pending->interval_ms); + if (rc == pcmk_ok) { + crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key); + free(local_key); + return TRUE; + } + + crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key); + /* The caller needs to make sure the entry is + * removed from the active operations list + * + * Usually by returning TRUE inside the worker function + * supplied to g_hash_table_foreach_remove() + * + * Not removing the entry from active operations will block + * the node from shutting down + */ + free(local_key); + return FALSE; +} + +struct cancel_data { + gboolean done; + gboolean remove; + const char *key; + lrmd_rsc_info_t *rsc; + lrm_state_t *lrm_state; +}; + +static gboolean +cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) +{ + gboolean remove = FALSE; + struct cancel_data *data = user_data; + active_op_t *op = value; + + if (pcmk__str_eq(op->op_key, data->key, pcmk__str_none)) { + data->done = TRUE; + remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove); + } + return remove; +} + +static gboolean +cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove) +{ + guint removed = 0; + struct cancel_data data; + + CRM_CHECK(rsc != NULL, return FALSE); + CRM_CHECK(key != NULL, return FALSE); + + data.key = key; + data.rsc = rsc; + data.done = FALSE; + data.remove = remove; + data.lrm_state = lrm_state; + + removed = g_hash_table_foreach_remove(lrm_state->active_ops, + cancel_action_by_key, &data); + crm_trace("Removed %u op cache entries, new size: %u", + removed, g_hash_table_size(lrm_state->active_ops)); + return data.done; +} + +/*! + * \internal + * \brief Retrieve resource information from LRM + * + * \param[in,out] lrm_state Executor connection state to use + * \param[in] rsc_xml XML containing resource configuration + * \param[in] do_create If true, register resource if not already + * \param[out] rsc_info Where to store information obtained from executor + * + * \retval pcmk_ok Success (and rsc_info holds newly allocated result) + * \retval -EINVAL Required information is missing from arguments + * \retval -ENOTCONN No active connection to LRM + * \retval -ENODEV Resource not found + * \retval -errno Error communicating with executor when registering resource + * + * \note Caller is responsible for freeing result on success. + */ +static int +get_lrm_resource(lrm_state_t *lrm_state, const xmlNode *rsc_xml, + gboolean do_create, lrmd_rsc_info_t **rsc_info) +{ + const char *id = ID(rsc_xml); + + CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL); + CRM_CHECK(id, return -EINVAL); + + if (lrm_state_is_connected(lrm_state) == FALSE) { + return -ENOTCONN; + } + + crm_trace("Retrieving resource information for %s from the executor", id); + *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); + + // If resource isn't known by ID, try clone name, if provided + if (!*rsc_info) { + const char *long_id = crm_element_value(rsc_xml, XML_ATTR_ID_LONG); + + if (long_id) { + *rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0); + } + } + + if ((*rsc_info == NULL) && do_create) { + const char *class = crm_element_value(rsc_xml, XML_AGENT_ATTR_CLASS); + const char *provider = crm_element_value(rsc_xml, XML_AGENT_ATTR_PROVIDER); + const char *type = crm_element_value(rsc_xml, XML_ATTR_TYPE); + int rc; + + crm_trace("Registering resource %s with the executor", id); + rc = lrm_state_register_rsc(lrm_state, id, class, provider, type, + lrmd_opt_drop_recurring); + if (rc != pcmk_ok) { + fsa_data_t *msg_data = NULL; + + crm_err("Could not register resource %s with the executor on %s: %s " + CRM_XS " rc=%d", + id, lrm_state->node_name, pcmk_strerror(rc), rc); + + /* Register this as an internal error if this involves the local + * executor. Otherwise, we're likely dealing with an unresponsive + * remote node, which is not an FSA failure. + */ + if (lrm_state_is_local(lrm_state) == TRUE) { + register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); + } + return rc; + } + + *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); + } + return *rsc_info? pcmk_ok : -ENODEV; +} + +static void +delete_resource(lrm_state_t *lrm_state, const char *id, lrmd_rsc_info_t *rsc, + GHashTableIter *iter, const char *sys, const char *user, + ha_msg_input_t *request, bool unregister, bool from_cib) +{ + int rc = pcmk_ok; + + crm_info("Removing resource %s from executor for %s%s%s", + id, sys, (user? " as " : ""), (user? user : "")); + + if (rsc && unregister) { + rc = lrm_state_unregister_rsc(lrm_state, id, 0); + } + + if (rc == pcmk_ok) { + crm_trace("Resource %s deleted from executor", id); + } else if (rc == -EINPROGRESS) { + crm_info("Deletion of resource '%s' from executor is pending", id); + if (request) { + struct pending_deletion_op_s *op = NULL; + char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE); + + op = calloc(1, sizeof(struct pending_deletion_op_s)); + op->rsc = strdup(rsc->id); + op->input = copy_ha_msg_input(request); + g_hash_table_insert(lrm_state->deletion_ops, ref, op); + } + return; + } else { + crm_warn("Could not delete '%s' from executor for %s%s%s: %s " + CRM_XS " rc=%d", id, sys, (user? " as " : ""), + (user? user : ""), pcmk_strerror(rc), rc); + } + + delete_rsc_entry(lrm_state, request, id, iter, rc, user, from_cib); +} + +static int +get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id) +{ + int call_id = 999999999; + rsc_history_t *entry = NULL; + + if(lrm_state) { + entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); + } + + /* Make sure the call id is greater than the last successful operation, + * otherwise the failure will not result in a possible recovery of the resource + * as it could appear the failure occurred before the successful start */ + if (entry) { + call_id = entry->last_callid + 1; + } + + if (call_id < 0) { + call_id = 1; + } + return call_id; +} + +static void +fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status, + enum ocf_exitcode op_exitcode, const char *exit_reason) +{ + op->call_id = get_fake_call_id(lrm_state, op->rsc_id); + op->t_run = time(NULL); + op->t_rcchange = op->t_run; + lrmd__set_result(op, op_exitcode, op_status, exit_reason); +} + +static void +force_reprobe(lrm_state_t *lrm_state, const char *from_sys, + const char *from_host, const char *user_name, + gboolean is_remote_node, bool reprobe_all_nodes) +{ + GHashTableIter gIter; + rsc_history_t *entry = NULL; + + crm_info("Clearing resource history on node %s", lrm_state->node_name); + g_hash_table_iter_init(&gIter, lrm_state->resource_history); + while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { + /* only unregister the resource during a reprobe if it is not a remote connection + * resource. otherwise unregistering the connection will terminate remote-node + * membership */ + bool unregister = true; + + if (is_remote_lrmd_ra(NULL, NULL, entry->id)) { + unregister = false; + + if (reprobe_all_nodes) { + lrm_state_t *remote_lrm_state = lrm_state_find(entry->id); + + if (remote_lrm_state != NULL) { + /* If reprobing all nodes, be sure to reprobe the remote + * node before clearing its connection resource + */ + force_reprobe(remote_lrm_state, from_sys, from_host, + user_name, TRUE, reprobe_all_nodes); + } + } + } + + /* Don't delete from the CIB, since we'll delete the whole node's LRM + * state from the CIB soon + */ + delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, + user_name, NULL, unregister, false); + } + + /* Now delete the copy in the CIB */ + controld_delete_node_state(lrm_state->node_name, controld_section_lrm, + cib_scope_local); + + // @COMPAT DCs < 1.1.14 need this deleted (in case it was explicitly false) + update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node); +} + +/*! + * \internal + * \brief Fail a requested action without actually executing it + * + * For an action that can't be executed, process it similarly to an actual + * execution result, with specified error status (except for notify actions, + * which will always be treated as successful). + * + * \param[in,out] lrm_state Executor connection that action is for + * \param[in] action Action XML from request + * \param[in] rc Desired return code to use + * \param[in] op_status Desired operation status to use + * \param[in] exit_reason Human-friendly detail, if error + */ +static void +synthesize_lrmd_failure(lrm_state_t *lrm_state, const xmlNode *action, + int op_status, enum ocf_exitcode rc, + const char *exit_reason) +{ + lrmd_event_data_t *op = NULL; + const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK); + const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET); + xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE); + + if ((xml_rsc == NULL) || (ID(xml_rsc) == NULL)) { + /* @TODO Should we do something else, like direct ack? */ + crm_info("Can't fake %s failure (%d) on %s without resource configuration", + crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, + target_node); + return; + + } else if(operation == NULL) { + /* This probably came from crm_resource -C, nothing to do */ + crm_info("Can't fake %s failure (%d) on %s without operation", + ID(xml_rsc), rc, target_node); + return; + } + + op = construct_op(lrm_state, action, ID(xml_rsc), operation); + + if (pcmk__str_eq(operation, RSC_NOTIFY, pcmk__str_casei)) { // Notifications can't fail + fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_OK, NULL); + } else { + fake_op_status(lrm_state, op, op_status, rc, exit_reason); + } + + crm_info("Faking " PCMK__OP_FMT " result (%d) on %s", + op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node); + + // Process the result as if it came from the LRM + process_lrm_event(lrm_state, op, NULL, action); + lrmd_free_event(op); +} + +/*! + * \internal + * \brief Get target of an LRM operation (replacing \p NULL with local node + * name) + * + * \param[in] xml LRM operation data XML + * + * \return LRM operation target node name (local node or Pacemaker Remote node) + */ +static const char * +lrm_op_target(const xmlNode *xml) +{ + const char *target = NULL; + + if (xml) { + target = crm_element_value(xml, XML_LRM_ATTR_TARGET); + } + if (target == NULL) { + target = controld_globals.our_nodename; + } + return target; +} + +static void +fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, + const char *from_host, const char *from_sys) +{ + lrmd_event_data_t *op = NULL; + lrmd_rsc_info_t *rsc = NULL; + xmlNode *xml_rsc = find_xml_node(xml, XML_CIB_TAG_RESOURCE, TRUE); + + CRM_CHECK(xml_rsc != NULL, return); + + /* The executor simply executes operations and reports the results, without + * any concept of success or failure, so to fail a resource, we must fake + * what a failure looks like. + * + * To do this, we create a fake executor operation event for the resource, + * and pass that event to the executor client callback so it will be + * processed as if it came from the executor. + */ + op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon"); + + free((char*) op->user_data); + op->user_data = NULL; + op->interval_ms = 0; + + if (user_name && !pcmk__is_privileged(user_name)) { + crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); + fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, + PCMK_OCF_INSUFFICIENT_PRIV, + "Unprivileged user cannot fail resources"); + controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc)); + lrmd_free_event(op); + return; + } + + + if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) { + crm_info("Failing resource %s...", rsc->id); + fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_UNKNOWN_ERROR, + "Simulated failure"); + process_lrm_event(lrm_state, op, NULL, xml); + op->rc = PCMK_OCF_OK; // The request to fail the resource succeeded + lrmd_free_rsc_info(rsc); + + } else { + crm_info("Cannot find/create resource in order to fail it..."); + crm_log_xml_warn(xml, "bad input"); + fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_UNKNOWN_ERROR, + "Cannot fail unknown resource"); + } + + controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc)); + lrmd_free_event(op); +} + +static void +handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys, + const char *from_host, const char *user_name, + gboolean is_remote_node, bool reprobe_all_nodes) +{ + crm_notice("Forcing the status of all resources to be redetected"); + force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node, + reprobe_all_nodes); + + if (!pcmk__strcase_any_of(from_sys, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) { + + xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host, + from_sys, CRM_SYSTEM_LRMD, + controld_globals.our_uuid); + + crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); + + if (relay_message(reply, TRUE) == FALSE) { + crm_log_xml_err(reply, "Unable to route reply"); + } + free_xml(reply); + } +} + +static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state, + lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys) +{ + char *op_key = NULL; + char *meta_key = NULL; + int call = 0; + const char *call_id = NULL; + const char *op_task = NULL; + guint interval_ms = 0; + gboolean in_progress = FALSE; + xmlNode *params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); + + CRM_CHECK(params != NULL, return FALSE); + + meta_key = crm_meta_name(XML_LRM_ATTR_TASK); + op_task = crm_element_value(params, meta_key); + free(meta_key); + CRM_CHECK(op_task != NULL, return FALSE); + + meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS); + if (crm_element_value_ms(params, meta_key, &interval_ms) != pcmk_ok) { + free(meta_key); + return FALSE; + } + free(meta_key); + + op_key = pcmk__op_key(rsc->id, op_task, interval_ms); + + meta_key = crm_meta_name(XML_LRM_ATTR_CALLID); + call_id = crm_element_value(params, meta_key); + free(meta_key); + + crm_debug("Scheduler requested op %s (call=%s) be cancelled", + op_key, (call_id? call_id : "NA")); + pcmk__scan_min_int(call_id, &call, 0); + if (call == 0) { + // Normal case when the scheduler cancels a recurring op + in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE); + + } else { + // Normal case when the scheduler cancels an orphan op + in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE); + } + + // Acknowledge cancellation operation if for a remote connection resource + if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) { + char *op_id = make_stop_id(rsc->id, call); + + if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) { + crm_info("Nothing known about operation %d for %s", call, op_key); + } + controld_delete_action_history_by_key(rsc->id, lrm_state->node_name, + op_key, call); + send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, + from_host, from_sys); + + /* needed at least for cancellation of a remote operation */ + if (lrm_state->active_ops != NULL) { + g_hash_table_remove(lrm_state->active_ops, op_id); + } + free(op_id); + + } else { + /* No ack is needed since abcdaa8, but peers with older versions + * in a rolling upgrade need one. We didn't bump the feature set + * at that commit, so we can only compare against the previous + * CRM version (3.0.8). If any peers have feature set 3.0.9 but + * not abcdaa8, they will time out waiting for the ack (no + * released versions of Pacemaker are affected). + */ + const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION); + + if (compare_version(peer_version, "3.0.8") <= 0) { + crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)", + op_key, from_host, peer_version); + send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, + from_host, from_sys); + } + } + + free(op_key); + return TRUE; +} + +static void +do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, + lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host, + bool crm_rsc_delete, const char *user_name) +{ + bool unregister = true; + int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name, + user_name, + cib_dryrun|cib_sync_call); + + if (cib_rc != pcmk_rc_ok) { + lrmd_event_data_t *op = NULL; + + op = construct_op(lrm_state, input->xml, rsc->id, CRMD_ACTION_DELETE); + + /* These are resource clean-ups, not actions, so no exit reason is + * needed. + */ + lrmd__set_result(op, pcmk_rc2ocf(cib_rc), PCMK_EXEC_ERROR, NULL); + controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id); + lrmd_free_event(op); + return; + } + + if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) { + unregister = false; + } + + delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, + user_name, input, unregister, true); +} + +// User data for asynchronous metadata execution +struct metadata_cb_data { + lrmd_rsc_info_t *rsc; // Copy of resource information + xmlNode *input_xml; // Copy of FSA input XML +}; + +static struct metadata_cb_data * +new_metadata_cb_data(lrmd_rsc_info_t *rsc, xmlNode *input_xml) +{ + struct metadata_cb_data *data = NULL; + + data = calloc(1, sizeof(struct metadata_cb_data)); + CRM_ASSERT(data != NULL); + data->input_xml = copy_xml(input_xml); + data->rsc = lrmd_copy_rsc_info(rsc); + return data; +} + +static void +free_metadata_cb_data(struct metadata_cb_data *data) +{ + lrmd_free_rsc_info(data->rsc); + free_xml(data->input_xml); + free(data); +} + +/*! + * \internal + * \brief Execute an action after metadata has been retrieved + * + * \param[in] pid Ignored + * \param[in] result Result of metadata action + * \param[in] user_data Metadata callback data + */ +static void +metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data) +{ + struct metadata_cb_data *data = (struct metadata_cb_data *) user_data; + + struct ra_metadata_s *md = NULL; + lrm_state_t *lrm_state = lrm_state_find(lrm_op_target(data->input_xml)); + + if ((lrm_state != NULL) && pcmk__result_ok(result)) { + md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc, + result->action_stdout); + } + do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md); + free_metadata_cb_data(data); +} + +/* A_LRM_INVOKE */ +void +do_lrm_invoke(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + lrm_state_t *lrm_state = NULL; + const char *crm_op = NULL; + const char *from_sys = NULL; + const char *from_host = NULL; + const char *operation = NULL; + ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); + const char *user_name = NULL; + const char *target_node = lrm_op_target(input->xml); + gboolean is_remote_node = FALSE; + bool crm_rsc_delete = FALSE; + + // Message routed to the local node is targeting a specific, non-local node + is_remote_node = !pcmk__str_eq(target_node, controld_globals.our_nodename, + pcmk__str_casei); + + lrm_state = lrm_state_find(target_node); + if ((lrm_state == NULL) && is_remote_node) { + crm_err("Failing action because local node has never had connection to remote node %s", + target_node); + synthesize_lrmd_failure(NULL, input->xml, PCMK_EXEC_NOT_CONNECTED, + PCMK_OCF_UNKNOWN_ERROR, + "Local node has no connection to remote"); + return; + } + CRM_ASSERT(lrm_state != NULL); + + user_name = pcmk__update_acl_user(input->msg, F_CRM_USER, NULL); + crm_op = crm_element_value(input->msg, F_CRM_TASK); + from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); + if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) { + from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); + } + + if (pcmk__str_eq(crm_op, CRM_OP_LRM_DELETE, pcmk__str_none)) { + if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) { + crm_rsc_delete = TRUE; // from crm_resource + } + operation = CRMD_ACTION_DELETE; + + } else if (input->xml != NULL) { + operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); + } + + CRM_CHECK(!pcmk__str_empty(crm_op) || !pcmk__str_empty(operation), return); + + crm_trace("'%s' execution request from %s as %s user", + pcmk__s(crm_op, operation), + pcmk__s(from_sys, "unknown subsystem"), + pcmk__s(user_name, "current")); + + if (pcmk__str_eq(crm_op, CRM_OP_LRM_FAIL, pcmk__str_none)) { + fail_lrm_resource(input->xml, lrm_state, user_name, from_host, + from_sys); + + } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_REFRESH, pcmk__str_none)) { + /* @COMPAT This can only be sent by crm_resource --refresh on a + * Pacemaker Remote node running Pacemaker 1.1.9, which is extremely + * unlikely. It previously would cause the controller to re-write its + * resource history to the CIB. Just ignore it. + */ + crm_notice("Ignoring refresh request from Pacemaker Remote 1.1.9 node"); + + // @COMPAT DCs <1.1.14 in a rolling upgrade might schedule this op + } else if (pcmk__str_eq(operation, CRM_OP_PROBED, pcmk__str_none)) { + update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, + user_name, is_remote_node); + + } else if (pcmk__str_eq(crm_op, CRM_OP_REPROBE, pcmk__str_none) + || pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) { + const char *raw_target = NULL; + + if (input->xml != NULL) { + // For CRM_OP_REPROBE, a NULL target means we're targeting all nodes + raw_target = crm_element_value(input->xml, XML_LRM_ATTR_TARGET); + } + handle_reprobe_op(lrm_state, from_sys, from_host, user_name, + is_remote_node, (raw_target == NULL)); + + } else if (operation != NULL) { + lrmd_rsc_info_t *rsc = NULL; + xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); + gboolean create_rsc = !pcmk__str_eq(operation, CRMD_ACTION_DELETE, + pcmk__str_none); + int rc; + + // We can't return anything meaningful without a resource ID + CRM_CHECK(xml_rsc && ID(xml_rsc), return); + + rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc); + if (rc == -ENOTCONN) { + synthesize_lrmd_failure(lrm_state, input->xml, + PCMK_EXEC_NOT_CONNECTED, + PCMK_OCF_UNKNOWN_ERROR, + "Not connected to remote executor"); + return; + + } else if ((rc < 0) && !create_rsc) { + /* Delete of malformed or nonexistent resource + * (deleting something that does not exist is a success) + */ + crm_notice("Not registering resource '%s' for a %s event " + CRM_XS " get-rc=%d (%s) transition-key=%s", + ID(xml_rsc), operation, + rc, pcmk_strerror(rc), ID(input->xml)); + delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, + user_name, true); + return; + + } else if (rc == -EINVAL) { + // Resource operation on malformed resource + crm_err("Invalid resource definition for %s", ID(xml_rsc)); + crm_log_xml_warn(input->msg, "invalid resource"); + synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR, + PCMK_OCF_NOT_CONFIGURED, // fatal error + "Invalid resource definition"); + return; + + } else if (rc < 0) { + // Error communicating with the executor + crm_err("Could not register resource '%s' with executor: %s " + CRM_XS " rc=%d", + ID(xml_rsc), pcmk_strerror(rc), rc); + crm_log_xml_warn(input->msg, "failed registration"); + synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR, + PCMK_OCF_INVALID_PARAM, // hard error + "Could not register resource with executor"); + return; + } + + if (pcmk__str_eq(operation, CRMD_ACTION_CANCEL, pcmk__str_none)) { + if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) { + crm_log_xml_warn(input->xml, "Bad command"); + } + + } else if (pcmk__str_eq(operation, CRMD_ACTION_DELETE, pcmk__str_none)) { + do_lrm_delete(input, lrm_state, rsc, from_sys, from_host, + crm_rsc_delete, user_name); + + } else { + struct ra_metadata_s *md = NULL; + + /* Getting metadata from cache is OK except for start actions -- + * always refresh from the agent for those, in case the resource + * agent was updated. + * + * @TODO Only refresh metadata for starts if the agent actually + * changed (using something like inotify, or a hash or modification + * time of the agent executable). + */ + if (strcmp(operation, CRMD_ACTION_START) != 0) { + md = controld_get_rsc_metadata(lrm_state, rsc, + controld_metadata_from_cache); + } + + if ((md == NULL) && crm_op_needs_metadata(rsc->standard, + operation)) { + /* Most likely, we'll need the agent metadata to record the + * pending operation and the operation result. Get it now rather + * than wait until then, so the metadata action doesn't eat into + * the real action's timeout. + * + * @TODO Metadata is retrieved via direct execution of the + * agent, which has a couple of related issues: the executor + * should execute agents, not the controller; and metadata for + * Pacemaker Remote nodes should be collected on those nodes, + * not locally. + */ + struct metadata_cb_data *data = NULL; + + data = new_metadata_cb_data(rsc, input->xml); + crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously", + rsc->id, rsc->standard, + ((rsc->provider == NULL)? "" : ":"), + ((rsc->provider == NULL)? "" : rsc->provider), + rsc->type); + (void) lrmd__metadata_async(rsc, metadata_complete, + (void *) data); + } else { + do_lrm_rsc_op(lrm_state, rsc, input->xml, md); + } + } + + lrmd_free_rsc_info(rsc); + + } else { + crm_err("Invalid execution request: unknown command '%s' (bug?)", + crm_op); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } +} + +static lrmd_event_data_t * +construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op, + const char *rsc_id, const char *operation) +{ + lrmd_event_data_t *op = NULL; + const char *op_delay = NULL; + const char *op_timeout = NULL; + GHashTable *params = NULL; + + xmlNode *primitive = NULL; + const char *class = NULL; + + const char *transition = NULL; + + CRM_ASSERT(rsc_id && operation); + + op = lrmd_new_event(rsc_id, operation, 0); + op->type = lrmd_event_exec_complete; + op->timeout = 0; + op->start_delay = 0; + lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); + + if (rsc_op == NULL) { + CRM_LOG_ASSERT(pcmk__str_eq(CRMD_ACTION_STOP, operation, pcmk__str_casei)); + op->user_data = NULL; + /* the stop_all_resources() case + * by definition there is no DC (or they'd be shutting + * us down). + * So we should put our version here. + */ + op->params = pcmk__strkey_table(free, free); + + g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); + + crm_trace("Constructed %s op for %s", operation, rsc_id); + return op; + } + + params = xml2list(rsc_op); + g_hash_table_remove(params, CRM_META "_op_target_rc"); + + op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY); + pcmk__scan_min_int(op_delay, &op->start_delay, 0); + + op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT); + pcmk__scan_min_int(op_timeout, &op->timeout, 0); + + if (pcmk__guint_from_hash(params, CRM_META "_" XML_LRM_ATTR_INTERVAL_MS, 0, + &(op->interval_ms)) != pcmk_rc_ok) { + op->interval_ms = 0; + } + + /* Use pcmk_monitor_timeout instead of meta timeout for stonith + recurring monitor, if set */ + primitive = find_xml_node(rsc_op, XML_CIB_TAG_RESOURCE, FALSE); + class = crm_element_value(primitive, XML_AGENT_ATTR_CLASS); + + if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_fence_params) + && pcmk__str_eq(operation, CRMD_ACTION_STATUS, pcmk__str_casei) + && (op->interval_ms > 0)) { + + op_timeout = g_hash_table_lookup(params, "pcmk_monitor_timeout"); + if (op_timeout != NULL) { + op->timeout = crm_get_msec(op_timeout); + } + } + + if (!pcmk__str_eq(operation, RSC_STOP, pcmk__str_casei)) { + op->params = params; + + } else { + rsc_history_t *entry = NULL; + + if (lrm_state) { + entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); + } + + /* If we do not have stop parameters cached, use + * whatever we are given */ + if (!entry || !entry->stop_params) { + op->params = params; + } else { + /* Copy the cached parameter list so that we stop the resource + * with the old attributes, not the new ones */ + op->params = pcmk__strkey_table(free, free); + + g_hash_table_foreach(params, copy_meta_keys, op->params); + g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params); + g_hash_table_destroy(params); + params = NULL; + } + } + + /* sanity */ + if (op->timeout <= 0) { + op->timeout = op->interval_ms; + } + if (op->start_delay < 0) { + op->start_delay = 0; + } + + transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); + CRM_CHECK(transition != NULL, return op); + + op->user_data = strdup(transition); + + if (op->interval_ms != 0) { + if (pcmk__strcase_any_of(operation, CRMD_ACTION_START, CRMD_ACTION_STOP, NULL)) { + crm_err("Start and Stop actions cannot have an interval: %u", + op->interval_ms); + op->interval_ms = 0; + } + } + + crm_trace("Constructed %s op for %s: interval=%u", + operation, rsc_id, op->interval_ms); + + return op; +} + +/*! + * \internal + * \brief Send a (synthesized) event result + * + * Reply with a synthesized event result directly, as opposed to going through + * the executor. + * + * \param[in] to_host Host to send result to + * \param[in] to_sys IPC name to send result (NULL for transition engine) + * \param[in] rsc Type information about resource the result is for + * \param[in,out] op Event with result to send + * \param[in] rsc_id ID of resource the result is for + */ +void +controld_ack_event_directly(const char *to_host, const char *to_sys, + const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, + const char *rsc_id) +{ + xmlNode *reply = NULL; + xmlNode *update, *iter; + crm_node_t *peer = NULL; + + CRM_CHECK(op != NULL, return); + if (op->rsc_id == NULL) { + CRM_ASSERT(rsc_id != NULL); + op->rsc_id = strdup(rsc_id); + } + if (to_sys == NULL) { + to_sys = CRM_SYSTEM_TENGINE; + } + + peer = crm_get_peer(0, controld_globals.our_nodename); + update = create_node_state_update(peer, node_update_none, NULL, + __func__); + + iter = create_xml_node(update, XML_CIB_TAG_LRM); + crm_xml_add(iter, XML_ATTR_ID, controld_globals.our_uuid); + iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); + iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); + + crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); + + controld_add_resource_history_xml(iter, rsc, op, + controld_globals.our_nodename); + reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); + + crm_log_xml_trace(update, "[direct ACK]"); + + crm_debug("ACK'ing resource op " PCMK__OP_FMT " from %s: %s", + op->rsc_id, op->op_type, op->interval_ms, op->user_data, + crm_element_value(reply, XML_ATTR_REFERENCE)); + + if (relay_message(reply, TRUE) == FALSE) { + crm_log_xml_err(reply, "Unable to route reply"); + } + + free_xml(update); + free_xml(reply); +} + +gboolean +verify_stopped(enum crmd_fsa_state cur_state, int log_level) +{ + gboolean res = TRUE; + GList *lrm_state_list = lrm_state_get_list(); + GList *state_entry; + + for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { + lrm_state_t *lrm_state = state_entry->data; + + if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) { + /* keep iterating through all even when false is returned */ + res = FALSE; + } + } + + controld_set_fsa_input_flags(R_SENT_RSC_STOP); + g_list_free(lrm_state_list); lrm_state_list = NULL; + return res; +} + +struct stop_recurring_action_s { + lrmd_rsc_info_t *rsc; + lrm_state_t *lrm_state; +}; + +static gboolean +stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) +{ + gboolean remove = FALSE; + struct stop_recurring_action_s *event = user_data; + active_op_t *op = value; + + if ((op->interval_ms != 0) + && pcmk__str_eq(op->rsc_id, event->rsc->id, pcmk__str_none)) { + + crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key); + remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE); + } + + return remove; +} + +static gboolean +stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) +{ + gboolean remove = FALSE; + lrm_state_t *lrm_state = user_data; + active_op_t *op = value; + + if (op->interval_ms != 0) { + crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, + (const char *) key); + remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE); + } + + return remove; +} + +/*! + * \internal + * \brief Check whether recurring actions should be cancelled before an action + * + * \param[in] rsc_id Resource that action is for + * \param[in] action Action being performed + * \param[in] interval_ms Operation interval of \p action (in milliseconds) + * + * \return true if recurring actions should be cancelled, otherwise false + */ +static bool +should_cancel_recurring(const char *rsc_id, const char *action, guint interval_ms) +{ + if (is_remote_lrmd_ra(NULL, NULL, rsc_id) && (interval_ms == 0) + && (strcmp(action, CRMD_ACTION_MIGRATE) == 0)) { + /* Don't stop monitoring a migrating Pacemaker Remote connection + * resource until the entire migration has completed. We must detect if + * the connection is unexpectedly severed, even during a migration. + */ + return false; + } + + // Cancel recurring actions before changing resource state + return (interval_ms == 0) + && !pcmk__str_any_of(action, CRMD_ACTION_STATUS, CRMD_ACTION_NOTIFY, + NULL); +} + +/*! + * \internal + * \brief Check whether an action should not be performed at this time + * + * \param[in] operation Action to be performed + * + * \return Readable description of why action should not be performed, + * or NULL if it should be performed + */ +static const char * +should_nack_action(const char *action) +{ + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN) + && pcmk__str_eq(action, RSC_START, pcmk__str_none)) { + + register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); + return "Not attempting start due to shutdown in progress"; + } + + switch (controld_globals.fsa_state) { + case S_NOT_DC: + case S_POLICY_ENGINE: // Recalculating + case S_TRANSITION_ENGINE: + break; + default: + if (!pcmk__str_eq(action, CRMD_ACTION_STOP, pcmk__str_none)) { + return "Controller cannot attempt actions at this time"; + } + break; + } + return NULL; +} + +static void +do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg, + struct ra_metadata_s *md) +{ + int rc; + int call_id = 0; + char *op_id = NULL; + lrmd_event_data_t *op = NULL; + fsa_data_t *msg_data = NULL; + const char *transition = NULL; + const char *operation = NULL; + const char *nack_reason = NULL; + + CRM_CHECK((rsc != NULL) && (msg != NULL), return); + + operation = crm_element_value(msg, XML_LRM_ATTR_TASK); + CRM_CHECK(!pcmk__str_empty(operation), return); + + transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); + if (pcmk__str_empty(transition)) { + crm_log_xml_err(msg, "Missing transition number"); + } + + if (lrm_state == NULL) { + // This shouldn't be possible, but provide a failsafe just in case + crm_err("Cannot execute %s of %s: No executor connection " + CRM_XS " transition_key=%s", + operation, rsc->id, pcmk__s(transition, "")); + synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID, + PCMK_OCF_UNKNOWN_ERROR, + "No executor connection"); + return; + } + + if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD, + CRMD_ACTION_RELOAD_AGENT, NULL)) { + /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs + * will schedule reload-agent actions only. In either case, we need + * to map that to whatever the resource agent actually supports. + * Default to the OCF 1.1 name. + */ + if ((md != NULL) + && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) { + operation = CRMD_ACTION_RELOAD; + } else { + operation = CRMD_ACTION_RELOAD_AGENT; + } + } + + op = construct_op(lrm_state, msg, rsc->id, operation); + CRM_CHECK(op != NULL, return); + + if (should_cancel_recurring(rsc->id, operation, op->interval_ms)) { + guint removed = 0; + struct stop_recurring_action_s data; + + data.rsc = rsc; + data.lrm_state = lrm_state; + removed = g_hash_table_foreach_remove(lrm_state->active_ops, + stop_recurring_action_by_rsc, + &data); + + if (removed) { + crm_debug("Stopped %u recurring operation%s in preparation for " + PCMK__OP_FMT, removed, pcmk__plural_s(removed), + rsc->id, operation, op->interval_ms); + } + } + + /* now do the op */ + crm_notice("Requesting local execution of %s operation for %s on %s " + CRM_XS " transition_key=%s op_key=" PCMK__OP_FMT, + crm_action_str(op->op_type, op->interval_ms), rsc->id, lrm_state->node_name, + pcmk__s(transition, ""), rsc->id, operation, op->interval_ms); + + nack_reason = should_nack_action(operation); + if (nack_reason != NULL) { + crm_notice("Discarding attempt to perform action %s on %s in state %s " + "(shutdown=%s)", operation, rsc->id, + fsa_state2string(controld_globals.fsa_state), + pcmk__btoa(pcmk_is_set(controld_globals.fsa_input_register, + R_SHUTDOWN))); + + lrmd__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_INVALID, + nack_reason); + controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); + lrmd_free_event(op); + free(op_id); + return; + } + + controld_record_pending_op(lrm_state->node_name, rsc, op); + + op_id = pcmk__op_key(rsc->id, op->op_type, op->interval_ms); + + if (op->interval_ms > 0) { + /* cancel it so we can then restart it without conflict */ + cancel_op_key(lrm_state, rsc, op_id, FALSE); + } + + rc = controld_execute_resource_agent(lrm_state, rsc->id, op->op_type, + op->user_data, op->interval_ms, + op->timeout, op->start_delay, + op->params, &call_id); + if (rc == pcmk_rc_ok) { + /* record all operations so we can wait + * for them to complete during shutdown + */ + char *call_id_s = make_stop_id(rsc->id, call_id); + active_op_t *pending = NULL; + + pending = calloc(1, sizeof(active_op_t)); + crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); + + pending->call_id = call_id; + pending->interval_ms = op->interval_ms; + pending->op_type = strdup(operation); + pending->op_key = strdup(op_id); + pending->rsc_id = strdup(rsc->id); + pending->start_time = time(NULL); + pcmk__str_update(&pending->user_data, op->user_data); + if (crm_element_value_epoch(msg, XML_CONFIG_ATTR_SHUTDOWN_LOCK, + &(pending->lock_time)) != pcmk_ok) { + pending->lock_time = 0; + } + g_hash_table_replace(lrm_state->active_ops, call_id_s, pending); + + if ((op->interval_ms > 0) + && (op->start_delay > START_DELAY_THRESHOLD)) { + int target_rc = PCMK_OCF_OK; + + crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); + decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc); + lrmd__set_result(op, target_rc, PCMK_EXEC_DONE, NULL); + controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); + } + + pending->params = op->params; + op->params = NULL; + + } else if (lrm_state_is_local(lrm_state)) { + crm_err("Could not initiate %s action for resource %s locally: %s " + CRM_XS " rc=%d", operation, rsc->id, pcmk_rc_str(rc), rc); + fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED, + PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc)); + process_lrm_event(lrm_state, op, NULL, NULL); + register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); + + } else { + crm_err("Could not initiate %s action for resource %s remotely on %s: " + "%s " CRM_XS " rc=%d", + operation, rsc->id, lrm_state->node_name, pcmk_rc_str(rc), rc); + fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED, + PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc)); + process_lrm_event(lrm_state, op, NULL, NULL); + } + + free(op_id); + lrmd_free_event(op); +} + +void +do_lrm_event(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data) +{ + CRM_CHECK(FALSE, return); +} + +static char * +unescape_newlines(const char *string) +{ + char *pch = NULL; + char *ret = NULL; + static const char *escaped_newline = "\\n"; + + if (!string) { + return NULL; + } + + ret = strdup(string); + pch = strstr(ret, escaped_newline); + while (pch != NULL) { + /* Replace newline escape pattern with actual newline (and a space so we + * don't have to shuffle the rest of the buffer) + */ + pch[0] = '\n'; + pch[1] = ' '; + pch = strstr(pch, escaped_newline); + } + + return ret; +} + +static bool +did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id, + const char * op_type, guint interval_ms) +{ + rsc_history_t *entry = NULL; + + CRM_CHECK(lrm_state != NULL, return FALSE); + CRM_CHECK(rsc_id != NULL, return FALSE); + CRM_CHECK(op_type != NULL, return FALSE); + + entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); + if (entry == NULL || entry->failed == NULL) { + return FALSE; + } + + if (pcmk__str_eq(entry->failed->rsc_id, rsc_id, pcmk__str_none) + && pcmk__str_eq(entry->failed->op_type, op_type, pcmk__str_casei) + && entry->failed->interval_ms == interval_ms) { + return TRUE; + } + + return FALSE; +} + +/*! + * \internal + * \brief Log the result of an executor action (actual or synthesized) + * + * \param[in] op Executor action to log result for + * \param[in] op_key Operation key for action + * \param[in] node_name Name of node action was performed on, if known + * \param[in] confirmed Whether to log that graph action was confirmed + */ +static void +log_executor_event(const lrmd_event_data_t *op, const char *op_key, + const char *node_name, gboolean confirmed) +{ + int log_level = LOG_ERR; + GString *str = g_string_sized_new(100); // reasonable starting size + + pcmk__g_strcat(str, + "Result of ", crm_action_str(op->op_type, op->interval_ms), + " operation for ", op->rsc_id, NULL); + + if (node_name != NULL) { + pcmk__g_strcat(str, " on ", node_name, NULL); + } + + switch (op->op_status) { + case PCMK_EXEC_DONE: + log_level = LOG_NOTICE; + pcmk__g_strcat(str, ": ", services_ocf_exitcode_str(op->rc), NULL); + break; + + case PCMK_EXEC_TIMEOUT: + pcmk__g_strcat(str, + ": ", pcmk_exec_status_str(op->op_status), " after ", + pcmk__readable_interval(op->timeout), NULL); + break; + + case PCMK_EXEC_CANCELLED: + log_level = LOG_INFO; + /* order of __attribute__ and Fall through comment is IMPORTANT! + * do not change it without proper testing with both clang and gcc + * in multiple versions. + * the clang check allows to build with all versions of clang. + * the has_c_attribute check is to workaround a bug in clang version + * in rhel7. has_attribute would happily return "YES SIR WE GOT IT" + * and fail the build the next line. + */ +#ifdef __clang__ +#ifdef __has_c_attribute +#if __has_attribute(fallthrough) + __attribute__((fallthrough)); +#endif +#endif +#endif + // Fall through + default: + pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status), + NULL); + } + + if ((op->exit_reason != NULL) + && ((op->op_status != PCMK_EXEC_DONE) || (op->rc != PCMK_OCF_OK))) { + + pcmk__g_strcat(str, " (", op->exit_reason, ")", NULL); + } + + g_string_append(str, " " CRM_XS); + g_string_append_printf(str, " graph action %sconfirmed; call=%d key=%s", + (confirmed? "" : "un"), op->call_id, op_key); + if (op->op_status == PCMK_EXEC_DONE) { + g_string_append_printf(str, " rc=%d", op->rc); + } + + do_crm_log(log_level, "%s", str->str); + g_string_free(str, TRUE); + + /* The services library has already logged the output at info or debug + * level, so just raise to notice if it looks like a failure. + */ + if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) { + char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output", + op->rsc_id, op->op_type, + op->interval_ms, node_name); + + crm_log_output(LOG_NOTICE, prefix, op->output); + free(prefix); + } +} + +void +process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, + active_op_t *pending, const xmlNode *action_xml) +{ + char *op_id = NULL; + char *op_key = NULL; + + gboolean remove = FALSE; + gboolean removed = FALSE; + bool need_direct_ack = FALSE; + lrmd_rsc_info_t *rsc = NULL; + const char *node_name = NULL; + + CRM_CHECK(op != NULL, return); + CRM_CHECK(op->rsc_id != NULL, return); + + // Remap new status codes for older DCs + if (compare_version(controld_globals.dc_version, "3.2.0") < 0) { + switch (op->op_status) { + case PCMK_EXEC_NOT_CONNECTED: + lrmd__set_result(op, PCMK_OCF_CONNECTION_DIED, + PCMK_EXEC_ERROR, op->exit_reason); + break; + case PCMK_EXEC_INVALID: + lrmd__set_result(op, CRM_DIRECT_NACK_RC, PCMK_EXEC_ERROR, + op->exit_reason); + break; + default: + break; + } + } + + op_id = make_stop_id(op->rsc_id, op->call_id); + op_key = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms); + + // Get resource info if available (from executor state or action XML) + if (lrm_state) { + rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); + } + if ((rsc == NULL) && action_xml) { + xmlNode *xml = find_xml_node(action_xml, XML_CIB_TAG_RESOURCE, TRUE); + + const char *standard = crm_element_value(xml, XML_AGENT_ATTR_CLASS); + const char *provider = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER); + const char *type = crm_element_value(xml, XML_ATTR_TYPE); + + if (standard && type) { + crm_info("%s agent information not cached, using %s%s%s:%s from action XML", + op->rsc_id, standard, + (provider? ":" : ""), (provider? provider : ""), type); + rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type); + } else { + crm_err("Can't process %s result because %s agent information not cached or in XML", + op_key, op->rsc_id); + } + } + + // Get node name if available (from executor state or action XML) + if (lrm_state) { + node_name = lrm_state->node_name; + } else if (action_xml) { + node_name = crm_element_value(action_xml, XML_LRM_ATTR_TARGET); + } + + if(pending == NULL) { + remove = TRUE; + if (lrm_state) { + pending = g_hash_table_lookup(lrm_state->active_ops, op_id); + } + } + + if (op->op_status == PCMK_EXEC_ERROR) { + switch(op->rc) { + case PCMK_OCF_NOT_RUNNING: + case PCMK_OCF_RUNNING_PROMOTED: + case PCMK_OCF_DEGRADED: + case PCMK_OCF_DEGRADED_PROMOTED: + // Leave it to the TE/scheduler to decide if this is an error + op->op_status = PCMK_EXEC_DONE; + break; + default: + /* Nothing to do */ + break; + } + } + + if (op->op_status != PCMK_EXEC_CANCELLED) { + /* We might not record the result, so directly acknowledge it to the + * originator instead, so it doesn't time out waiting for the result + * (especially important if part of a transition). + */ + need_direct_ack = TRUE; + + if (controld_action_is_recordable(op->op_type)) { + if (node_name && rsc) { + // We should record the result, and happily, we can + time_t lock_time = (pending == NULL)? 0 : pending->lock_time; + + controld_update_resource_history(node_name, rsc, op, lock_time); + need_direct_ack = FALSE; + + } else if (op->rsc_deleted) { + /* We shouldn't record the result (likely the resource was + * refreshed, cleaned, or removed while this operation was + * in flight). + */ + crm_notice("Not recording %s result in CIB because " + "resource information was removed since it was initiated", + op_key); + } else { + /* This shouldn't be possible; the executor didn't consider the + * resource deleted, but we couldn't find resource or node + * information. + */ + crm_err("Unable to record %s result in CIB: %s", op_key, + (node_name? "No resource information" : "No node name")); + } + } + + } else if (op->interval_ms == 0) { + /* A non-recurring operation was cancelled. Most likely, the + * never-initiated action was removed from the executor's pending + * operations list upon resource removal. + */ + need_direct_ack = TRUE; + + } else if (pending == NULL) { + /* This recurring operation was cancelled, but was not pending. No + * transition actions are waiting on it, nothing needs to be done. + */ + + } else if (op->user_data == NULL) { + /* This recurring operation was cancelled and pending, but we don't + * have a transition key. This should never happen. + */ + crm_err("Recurring operation %s was cancelled without transition information", + op_key); + + } else if (pcmk_is_set(pending->flags, active_op_remove)) { + /* This recurring operation was cancelled (by us) and pending, and we + * have been waiting for it to finish. + */ + if (lrm_state) { + controld_delete_action_history(op); + } + + /* Directly acknowledge failed recurring actions here. The above call to + * controld_delete_action_history() will not erase any corresponding + * last_failure entry, which means that the DC won't confirm the + * cancellation via process_op_deletion(), and the transition would + * otherwise wait for the action timer to pop. + */ + if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id, + pending->op_type, pending->interval_ms)) { + need_direct_ack = TRUE; + } + + } else if (op->rsc_deleted) { + /* This recurring operation was cancelled (but not by us, and the + * executor does not have resource information, likely due to resource + * cleanup, refresh, or removal) and pending. + */ + crm_debug("Recurring op %s was cancelled due to resource deletion", + op_key); + need_direct_ack = TRUE; + + } else { + /* This recurring operation was cancelled (but not by us, likely by the + * executor before stopping the resource) and pending. We don't need to + * do anything special. + */ + } + + if (need_direct_ack) { + controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id); + } + + if(remove == FALSE) { + /* The caller will do this afterwards, but keep the logging consistent */ + removed = TRUE; + + } else if (lrm_state && ((op->interval_ms == 0) + || (op->op_status == PCMK_EXEC_CANCELLED))) { + + gboolean found = g_hash_table_remove(lrm_state->active_ops, op_id); + + if (op->interval_ms != 0) { + removed = TRUE; + } else if (found) { + removed = TRUE; + crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", + op_key, op->call_id, op_id, + g_hash_table_size(lrm_state->active_ops)); + } + } + + log_executor_event(op, op_key, node_name, removed); + + if (lrm_state) { + if (!pcmk__str_eq(op->op_type, RSC_METADATA, pcmk__str_casei)) { + crmd_alert_resource_op(lrm_state->node_name, op); + } else if (rsc && (op->rc == PCMK_OCF_OK)) { + char *metadata = unescape_newlines(op->output); + + controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata); + free(metadata); + } + } + + if (op->rsc_deleted) { + crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); + if (lrm_state) { + delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL, + true); + } + } + + /* If a shutdown was escalated while operations were pending, + * then the FSA will be stalled right now... allow it to continue + */ + controld_trigger_fsa(); + if (lrm_state && rsc) { + update_history_cache(lrm_state, rsc, op); + } + + lrmd_free_rsc_info(rsc); + free(op_key); + free(op_id); +} diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c new file mode 100644 index 0000000..8c68bfc --- /dev/null +++ b/daemons/controld/controld_execd_state.c @@ -0,0 +1,814 @@ +/* + * Copyright 2012-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <errno.h> + +#include <crm/crm.h> +#include <crm/msg_xml.h> +#include <crm/common/iso8601.h> +#include <crm/pengine/rules.h> +#include <crm/pengine/rules_internal.h> +#include <crm/lrmd_internal.h> + +#include <pacemaker-internal.h> +#include <pacemaker-controld.h> + +static GHashTable *lrm_state_table = NULL; +extern GHashTable *proxy_table; +int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); +void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); + +static void +free_rsc_info(gpointer value) +{ + lrmd_rsc_info_t *rsc_info = value; + + lrmd_free_rsc_info(rsc_info); +} + +static void +free_deletion_op(gpointer value) +{ + struct pending_deletion_op_s *op = value; + + free(op->rsc); + delete_ha_msg_input(op->input); + free(op); +} + +static void +free_recurring_op(gpointer value) +{ + active_op_t *op = value; + + free(op->user_data); + free(op->rsc_id); + free(op->op_type); + free(op->op_key); + if (op->params) { + g_hash_table_destroy(op->params); + } + free(op); +} + +static gboolean +fail_pending_op(gpointer key, gpointer value, gpointer user_data) +{ + lrmd_event_data_t event = { 0, }; + lrm_state_t *lrm_state = user_data; + active_op_t *op = value; + + crm_trace("Pre-emptively failing " PCMK__OP_FMT " on %s (call=%s, %s)", + op->rsc_id, op->op_type, op->interval_ms, + lrm_state->node_name, (char*)key, op->user_data); + + event.type = lrmd_event_exec_complete; + event.rsc_id = op->rsc_id; + event.op_type = op->op_type; + event.user_data = op->user_data; + event.timeout = 0; + event.interval_ms = op->interval_ms; + lrmd__set_result(&event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_NOT_CONNECTED, + "Action was pending when executor connection was dropped"); + event.t_run = (unsigned int) op->start_time; + event.t_rcchange = (unsigned int) op->start_time; + + event.call_id = op->call_id; + event.remote_nodename = lrm_state->node_name; + event.params = op->params; + + process_lrm_event(lrm_state, &event, op, NULL); + lrmd__reset_result(&event); + return TRUE; +} + +gboolean +lrm_state_is_local(lrm_state_t *lrm_state) +{ + return (lrm_state != NULL) + && pcmk__str_eq(lrm_state->node_name, controld_globals.our_nodename, + pcmk__str_casei); +} + +/*! + * \internal + * \brief Create executor state entry for a node and add it to the state table + * + * \param[in] node_name Node to create entry for + * + * \return Newly allocated executor state object initialized for \p node_name + */ +static lrm_state_t * +lrm_state_create(const char *node_name) +{ + lrm_state_t *state = NULL; + + if (!node_name) { + crm_err("No node name given for lrm state object"); + return NULL; + } + + state = calloc(1, sizeof(lrm_state_t)); + if (!state) { + return NULL; + } + + state->node_name = strdup(node_name); + state->rsc_info_cache = pcmk__strkey_table(NULL, free_rsc_info); + state->deletion_ops = pcmk__strkey_table(free, free_deletion_op); + state->active_ops = pcmk__strkey_table(free, free_recurring_op); + state->resource_history = pcmk__strkey_table(NULL, history_free); + state->metadata_cache = metadata_cache_new(); + + g_hash_table_insert(lrm_state_table, (char *)state->node_name, state); + return state; +} + +void +lrm_state_destroy(const char *node_name) +{ + g_hash_table_remove(lrm_state_table, node_name); +} + +static gboolean +remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data) +{ + remote_proxy_t *proxy = value; + const char *node_name = user_data; + + if (pcmk__str_eq(node_name, proxy->node_name, pcmk__str_casei)) { + return TRUE; + } + + return FALSE; +} + +static remote_proxy_t * +find_connected_proxy_by_node(const char * node_name) +{ + GHashTableIter gIter; + remote_proxy_t *proxy = NULL; + + CRM_CHECK(proxy_table != NULL, return NULL); + + g_hash_table_iter_init(&gIter, proxy_table); + + while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) &proxy)) { + if (proxy->source + && pcmk__str_eq(node_name, proxy->node_name, pcmk__str_casei)) { + return proxy; + } + } + + return NULL; +} + +static void +remote_proxy_disconnect_by_node(const char * node_name) +{ + remote_proxy_t *proxy = NULL; + + CRM_CHECK(proxy_table != NULL, return); + + while ((proxy = find_connected_proxy_by_node(node_name)) != NULL) { + /* mainloop_del_ipc_client() eventually calls remote_proxy_disconnected() + * , which removes the entry from proxy_table. + * Do not do this in a g_hash_table_iter_next() loop. */ + if (proxy->source) { + mainloop_del_ipc_client(proxy->source); + } + } + + return; +} + +static void +internal_lrm_state_destroy(gpointer data) +{ + lrm_state_t *lrm_state = data; + + if (!lrm_state) { + return; + } + + /* Rather than directly remove the recorded proxy entries from proxy_table, + * make sure any connected proxies get disconnected. So that + * remote_proxy_disconnected() will be called and as well remove the + * entries from proxy_table. + */ + remote_proxy_disconnect_by_node(lrm_state->node_name); + + crm_trace("Destroying proxy table %s with %u members", + lrm_state->node_name, g_hash_table_size(proxy_table)); + // Just in case there's still any leftovers in proxy_table + g_hash_table_foreach_remove(proxy_table, remote_proxy_remove_by_node, (char *) lrm_state->node_name); + remote_ra_cleanup(lrm_state); + lrmd_api_delete(lrm_state->conn); + + if (lrm_state->rsc_info_cache) { + crm_trace("Destroying rsc info cache with %u members", + g_hash_table_size(lrm_state->rsc_info_cache)); + g_hash_table_destroy(lrm_state->rsc_info_cache); + } + if (lrm_state->resource_history) { + crm_trace("Destroying history op cache with %u members", + g_hash_table_size(lrm_state->resource_history)); + g_hash_table_destroy(lrm_state->resource_history); + } + if (lrm_state->deletion_ops) { + crm_trace("Destroying deletion op cache with %u members", + g_hash_table_size(lrm_state->deletion_ops)); + g_hash_table_destroy(lrm_state->deletion_ops); + } + if (lrm_state->active_ops != NULL) { + crm_trace("Destroying pending op cache with %u members", + g_hash_table_size(lrm_state->active_ops)); + g_hash_table_destroy(lrm_state->active_ops); + } + metadata_cache_free(lrm_state->metadata_cache); + + free((char *)lrm_state->node_name); + free(lrm_state); +} + +void +lrm_state_reset_tables(lrm_state_t * lrm_state, gboolean reset_metadata) +{ + if (lrm_state->resource_history) { + crm_trace("Resetting resource history cache with %u members", + g_hash_table_size(lrm_state->resource_history)); + g_hash_table_remove_all(lrm_state->resource_history); + } + if (lrm_state->deletion_ops) { + crm_trace("Resetting deletion operations cache with %u members", + g_hash_table_size(lrm_state->deletion_ops)); + g_hash_table_remove_all(lrm_state->deletion_ops); + } + if (lrm_state->active_ops != NULL) { + crm_trace("Resetting active operations cache with %u members", + g_hash_table_size(lrm_state->active_ops)); + g_hash_table_remove_all(lrm_state->active_ops); + } + if (lrm_state->rsc_info_cache) { + crm_trace("Resetting resource information cache with %u members", + g_hash_table_size(lrm_state->rsc_info_cache)); + g_hash_table_remove_all(lrm_state->rsc_info_cache); + } + if (reset_metadata) { + metadata_cache_reset(lrm_state->metadata_cache); + } +} + +gboolean +lrm_state_init_local(void) +{ + if (lrm_state_table) { + return TRUE; + } + + lrm_state_table = pcmk__strikey_table(NULL, internal_lrm_state_destroy); + if (!lrm_state_table) { + return FALSE; + } + + proxy_table = pcmk__strikey_table(NULL, remote_proxy_free); + if (!proxy_table) { + g_hash_table_destroy(lrm_state_table); + lrm_state_table = NULL; + return FALSE; + } + + return TRUE; +} + +void +lrm_state_destroy_all(void) +{ + if (lrm_state_table) { + crm_trace("Destroying state table with %u members", + g_hash_table_size(lrm_state_table)); + g_hash_table_destroy(lrm_state_table); lrm_state_table = NULL; + } + if(proxy_table) { + crm_trace("Destroying proxy table with %u members", + g_hash_table_size(proxy_table)); + g_hash_table_destroy(proxy_table); proxy_table = NULL; + } +} + +lrm_state_t * +lrm_state_find(const char *node_name) +{ + if (!node_name) { + return NULL; + } + return g_hash_table_lookup(lrm_state_table, node_name); +} + +lrm_state_t * +lrm_state_find_or_create(const char *node_name) +{ + lrm_state_t *lrm_state; + + lrm_state = g_hash_table_lookup(lrm_state_table, node_name); + if (!lrm_state) { + lrm_state = lrm_state_create(node_name); + } + + return lrm_state; +} + +GList * +lrm_state_get_list(void) +{ + return g_hash_table_get_values(lrm_state_table); +} + +void +lrm_state_disconnect_only(lrm_state_t * lrm_state) +{ + int removed = 0; + + if (!lrm_state->conn) { + return; + } + crm_trace("Disconnecting %s", lrm_state->node_name); + + remote_proxy_disconnect_by_node(lrm_state->node_name); + + ((lrmd_t *) lrm_state->conn)->cmds->disconnect(lrm_state->conn); + + if (!pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + removed = g_hash_table_foreach_remove(lrm_state->active_ops, + fail_pending_op, lrm_state); + crm_trace("Synthesized %d operation failures for %s", removed, lrm_state->node_name); + } +} + +void +lrm_state_disconnect(lrm_state_t * lrm_state) +{ + if (!lrm_state->conn) { + return; + } + + lrm_state_disconnect_only(lrm_state); + + lrmd_api_delete(lrm_state->conn); + lrm_state->conn = NULL; +} + +int +lrm_state_is_connected(lrm_state_t * lrm_state) +{ + if (!lrm_state->conn) { + return FALSE; + } + return ((lrmd_t *) lrm_state->conn)->cmds->is_connected(lrm_state->conn); +} + +int +lrm_state_poke_connection(lrm_state_t * lrm_state) +{ + + if (!lrm_state->conn) { + return -ENOTCONN; + } + return ((lrmd_t *) lrm_state->conn)->cmds->poke_connection(lrm_state->conn); +} + +// \return Standard Pacemaker return code +int +controld_connect_local_executor(lrm_state_t *lrm_state) +{ + int rc = pcmk_rc_ok; + + if (lrm_state->conn == NULL) { + lrmd_t *api = NULL; + + rc = lrmd__new(&api, NULL, NULL, 0); + if (rc != pcmk_rc_ok) { + return rc; + } + api->cmds->set_callback(api, lrm_op_callback); + lrm_state->conn = api; + } + + rc = ((lrmd_t *) lrm_state->conn)->cmds->connect(lrm_state->conn, + CRM_SYSTEM_CRMD, NULL); + rc = pcmk_legacy2rc(rc); + + if (rc == pcmk_rc_ok) { + lrm_state->num_lrm_register_fails = 0; + } else { + lrm_state->num_lrm_register_fails++; + } + return rc; +} + +static remote_proxy_t * +crmd_remote_proxy_new(lrmd_t *lrmd, const char *node_name, const char *session_id, const char *channel) +{ + struct ipc_client_callbacks proxy_callbacks = { + .dispatch = remote_proxy_dispatch, + .destroy = remote_proxy_disconnected + }; + remote_proxy_t *proxy = remote_proxy_new(lrmd, &proxy_callbacks, node_name, + session_id, channel); + return proxy; +} + +gboolean +crmd_is_proxy_session(const char *session) +{ + return g_hash_table_lookup(proxy_table, session) ? TRUE : FALSE; +} + +void +crmd_proxy_send(const char *session, xmlNode *msg) +{ + remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); + lrm_state_t *lrm_state = NULL; + + if (!proxy) { + return; + } + crm_log_xml_trace(msg, "to-proxy"); + lrm_state = lrm_state_find(proxy->node_name); + if (lrm_state) { + crm_trace("Sending event to %.8s on %s", proxy->session_id, proxy->node_name); + remote_proxy_relay_event(proxy, msg); + } +} + +static void +crmd_proxy_dispatch(const char *session, xmlNode *msg) +{ + crm_trace("Processing proxied IPC message from session %s", session); + crm_log_xml_trace(msg, "controller[inbound]"); + crm_xml_add(msg, F_CRM_SYS_FROM, session); + if (controld_authorize_ipc_message(msg, NULL, session)) { + route_message(C_IPC_MESSAGE, msg); + } + controld_trigger_fsa(); +} + +static void +remote_config_check(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + if (rc != pcmk_ok) { + crm_err("Query resulted in an error: %s", pcmk_strerror(rc)); + + if (rc == -EACCES || rc == -pcmk_err_schema_validation) { + crm_err("The cluster is mis-configured - shutting down and staying down"); + } + + } else { + lrmd_t * lrmd = (lrmd_t *)user_data; + crm_time_t *now = crm_time_new(NULL); + GHashTable *config_hash = pcmk__strkey_table(free, free); + + crm_debug("Call %d : Parsing CIB options", call_id); + + pe_unpack_nvpairs(output, output, XML_CIB_TAG_PROPSET, NULL, + config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL); + + /* Now send it to the remote peer */ + lrmd__validate_remote_settings(lrmd, config_hash); + + g_hash_table_destroy(config_hash); + crm_time_free(now); + } +} + +static void +crmd_remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg) +{ + lrm_state_t *lrm_state = userdata; + const char *session = crm_element_value(msg, F_LRMD_IPC_SESSION); + remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); + + const char *op = crm_element_value(msg, F_LRMD_IPC_OP); + if (pcmk__str_eq(op, LRMD_IPC_OP_NEW, pcmk__str_casei)) { + const char *channel = crm_element_value(msg, F_LRMD_IPC_IPC_SERVER); + + proxy = crmd_remote_proxy_new(lrmd, lrm_state->node_name, session, channel); + if (!remote_ra_controlling_guest(lrm_state)) { + if (proxy != NULL) { + cib_t *cib_conn = controld_globals.cib_conn; + + /* Look up stonith-watchdog-timeout and send to the remote peer for validation */ + int rc = cib_conn->cmds->query(cib_conn, XML_CIB_TAG_CRMCONFIG, + NULL, cib_scope_local); + cib_conn->cmds->register_callback_full(cib_conn, rc, 10, FALSE, + lrmd, + "remote_config_check", + remote_config_check, + NULL); + } + } else { + crm_debug("Skipping remote_config_check for guest-nodes"); + } + + } else if (pcmk__str_eq(op, LRMD_IPC_OP_SHUTDOWN_REQ, pcmk__str_casei)) { + char *now_s = NULL; + + crm_notice("%s requested shutdown of its remote connection", + lrm_state->node_name); + + if (!remote_ra_is_in_maintenance(lrm_state)) { + now_s = pcmk__ttoa(time(NULL)); + update_attrd(lrm_state->node_name, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, TRUE); + free(now_s); + + remote_proxy_ack_shutdown(lrmd); + + crm_warn("Reconnection attempts to %s may result in failures that must be cleared", + lrm_state->node_name); + } else { + remote_proxy_nack_shutdown(lrmd); + + crm_notice("Remote resource for %s is not managed so no ordered shutdown happening", + lrm_state->node_name); + } + return; + + } else if (pcmk__str_eq(op, LRMD_IPC_OP_REQUEST, pcmk__str_casei) && proxy && proxy->is_local) { + /* This is for the controller, which we are, so don't try + * to send to ourselves over IPC -- do it directly. + */ + int flags = 0; + xmlNode *request = get_message_xml(msg, F_LRMD_IPC_MSG); + + CRM_CHECK(request != NULL, return); + CRM_CHECK(lrm_state->node_name, return); + crm_xml_add(request, XML_ACL_TAG_ROLE, "pacemaker-remote"); + pcmk__update_acl_user(request, F_LRMD_IPC_USER, lrm_state->node_name); + + /* Pacemaker Remote nodes don't know their own names (as known to the + * cluster). When getting a node info request with no name or ID, add + * the name, so we don't return info for ourselves instead of the + * Pacemaker Remote node. + */ + if (pcmk__str_eq(crm_element_value(request, F_CRM_TASK), CRM_OP_NODE_INFO, pcmk__str_casei)) { + int node_id = 0; + + crm_element_value_int(request, XML_ATTR_ID, &node_id); + if ((node_id <= 0) + && (crm_element_value(request, XML_ATTR_UNAME) == NULL)) { + crm_xml_add(request, XML_ATTR_UNAME, lrm_state->node_name); + } + } + + crmd_proxy_dispatch(session, request); + + crm_element_value_int(msg, F_LRMD_IPC_MSG_FLAGS, &flags); + if (flags & crm_ipc_client_response) { + int msg_id = 0; + xmlNode *op_reply = create_xml_node(NULL, "ack"); + + crm_xml_add(op_reply, "function", __func__); + crm_xml_add_int(op_reply, "line", __LINE__); + + crm_element_value_int(msg, F_LRMD_IPC_MSG_ID, &msg_id); + remote_proxy_relay_response(proxy, op_reply, msg_id); + + free_xml(op_reply); + } + + } else { + remote_proxy_cb(lrmd, lrm_state->node_name, msg); + } +} + + +// \return Standard Pacemaker return code +int +controld_connect_remote_executor(lrm_state_t *lrm_state, const char *server, + int port, int timeout_ms) +{ + int rc = pcmk_rc_ok; + + if (lrm_state->conn == NULL) { + lrmd_t *api = NULL; + + rc = lrmd__new(&api, lrm_state->node_name, server, port); + if (rc != pcmk_rc_ok) { + crm_warn("Pacemaker Remote connection to %s:%s failed: %s " + CRM_XS " rc=%d", server, port, pcmk_rc_str(rc), rc); + + return rc; + } + lrm_state->conn = api; + api->cmds->set_callback(api, remote_lrm_op_callback); + lrmd_internal_set_proxy_callback(api, lrm_state, crmd_remote_proxy_cb); + } + + crm_trace("Initiating remote connection to %s:%d with timeout %dms", + server, port, timeout_ms); + rc = ((lrmd_t *) lrm_state->conn)->cmds->connect_async(lrm_state->conn, + lrm_state->node_name, + timeout_ms); + if (rc == pcmk_ok) { + lrm_state->num_lrm_register_fails = 0; + } else { + lrm_state->num_lrm_register_fails++; // Ignored for remote connections + } + return pcmk_legacy2rc(rc); +} + +int +lrm_state_get_metadata(lrm_state_t * lrm_state, + const char *class, + const char *provider, + const char *agent, char **output, enum lrmd_call_options options) +{ + lrmd_key_value_t *params = NULL; + + if (!lrm_state->conn) { + return -ENOTCONN; + } + + /* Add the node name to the environment, as is done with normal resource + * action calls. Meta-data calls shouldn't need it, but some agents are + * written with an ocf_local_nodename call at the beginning regardless of + * action. Without the environment variable, the agent would try to contact + * the controller to get the node name -- but the controller would be + * blocking on the synchronous meta-data call. + * + * At this point, we have to assume that agents are unlikely to make other + * calls that require the controller, such as crm_node --quorum or + * --cluster-id. + * + * @TODO Make meta-data calls asynchronous. (This will be part of a larger + * project to make meta-data calls via the executor rather than directly.) + */ + params = lrmd_key_value_add(params, CRM_META "_" XML_LRM_ATTR_TARGET, + lrm_state->node_name); + + return ((lrmd_t *) lrm_state->conn)->cmds->get_metadata_params(lrm_state->conn, + class, provider, agent, output, options, params); +} + +int +lrm_state_cancel(lrm_state_t *lrm_state, const char *rsc_id, const char *action, + guint interval_ms) +{ + if (!lrm_state->conn) { + return -ENOTCONN; + } + + /* Figure out a way to make this async? + * NOTICE: Currently it's synced and directly acknowledged in do_lrm_invoke(). */ + if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { + return remote_ra_cancel(lrm_state, rsc_id, action, interval_ms); + } + return ((lrmd_t *) lrm_state->conn)->cmds->cancel(lrm_state->conn, rsc_id, + action, interval_ms); +} + +lrmd_rsc_info_t * +lrm_state_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options) +{ + lrmd_rsc_info_t *rsc = NULL; + + if (!lrm_state->conn) { + return NULL; + } + if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { + return remote_ra_get_rsc_info(lrm_state, rsc_id); + } + + rsc = g_hash_table_lookup(lrm_state->rsc_info_cache, rsc_id); + if (rsc == NULL) { + /* only contact the lrmd if we don't already have a cached rsc info */ + rsc = ((lrmd_t *) lrm_state->conn)->cmds->get_rsc_info(lrm_state->conn, rsc_id, options); + if (rsc == NULL) { + return NULL; + } + /* cache the result */ + g_hash_table_insert(lrm_state->rsc_info_cache, rsc->id, rsc); + } + + return lrmd_copy_rsc_info(rsc); + +} + +/*! + * \internal + * \brief Initiate a resource agent action + * + * \param[in,out] lrm_state Executor state object + * \param[in] rsc_id ID of resource for action + * \param[in] action Action to execute + * \param[in] userdata String to copy and pass to execution callback + * \param[in] interval_ms Action interval (in milliseconds) + * \param[in] timeout_ms Action timeout (in milliseconds) + * \param[in] start_delay_ms Delay (in ms) before initiating action + * \param[in] parameters Hash table of resource parameters + * \param[out] call_id Where to store call ID on success + * + * \return Standard Pacemaker return code + */ +int +controld_execute_resource_agent(lrm_state_t *lrm_state, const char *rsc_id, + const char *action, const char *userdata, + guint interval_ms, int timeout_ms, + int start_delay_ms, GHashTable *parameters, + int *call_id) +{ + int rc = pcmk_rc_ok; + lrmd_key_value_t *params = NULL; + + if (lrm_state->conn == NULL) { + return ENOTCONN; + } + + // Convert parameters from hash table to list + if (parameters != NULL) { + const char *key = NULL; + const char *value = NULL; + GHashTableIter iter; + + g_hash_table_iter_init(&iter, parameters); + while (g_hash_table_iter_next(&iter, (gpointer *) &key, + (gpointer *) &value)) { + params = lrmd_key_value_add(params, key, value); + } + } + + if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { + rc = controld_execute_remote_agent(lrm_state, rsc_id, action, + userdata, interval_ms, timeout_ms, + start_delay_ms, params, call_id); + + } else { + rc = ((lrmd_t *) lrm_state->conn)->cmds->exec(lrm_state->conn, rsc_id, + action, userdata, + interval_ms, timeout_ms, + start_delay_ms, + lrmd_opt_notify_changes_only, + params); + if (rc < 0) { + rc = pcmk_legacy2rc(rc); + } else { + *call_id = rc; + rc = pcmk_rc_ok; + } + } + return rc; +} + +int +lrm_state_register_rsc(lrm_state_t * lrm_state, + const char *rsc_id, + const char *class, + const char *provider, const char *agent, enum lrmd_call_options options) +{ + lrmd_t *conn = (lrmd_t *) lrm_state->conn; + + if (conn == NULL) { + return -ENOTCONN; + } + + if (is_remote_lrmd_ra(agent, provider, NULL)) { + return lrm_state_find_or_create(rsc_id)? pcmk_ok : -EINVAL; + } + + /* @TODO Implement an asynchronous version of this (currently a blocking + * call to the lrmd). + */ + return conn->cmds->register_rsc(lrm_state->conn, rsc_id, class, provider, + agent, options); +} + +int +lrm_state_unregister_rsc(lrm_state_t * lrm_state, + const char *rsc_id, enum lrmd_call_options options) +{ + if (!lrm_state->conn) { + return -ENOTCONN; + } + + if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { + lrm_state_destroy(rsc_id); + return pcmk_ok; + } + + g_hash_table_remove(lrm_state->rsc_info_cache, rsc_id); + + /* @TODO Optimize this ... this function is a blocking round trip from + * client to daemon. The controld_execd_state.c code path that uses this + * function should always treat it as an async operation. The executor API + * should make an async version available. + */ + return ((lrmd_t *) lrm_state->conn)->cmds->unregister_rsc(lrm_state->conn, rsc_id, options); +} diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c new file mode 100644 index 0000000..89cb61f --- /dev/null +++ b/daemons/controld/controld_fencing.c @@ -0,0 +1,1108 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> +#include <crm/crm.h> +#include <crm/msg_xml.h> +#include <crm/common/xml.h> +#include <crm/stonith-ng.h> +#include <crm/fencing/internal.h> + +#include <pacemaker-controld.h> + +static void +tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event); + +/* + * stonith failure counting + * + * We don't want to get stuck in a permanent fencing loop. Keep track of the + * number of fencing failures for each target node, and the most we'll restart a + * transition for. + */ + +struct st_fail_rec { + int count; +}; + +static bool fence_reaction_panic = false; +static unsigned long int stonith_max_attempts = 10; +static GHashTable *stonith_failures = NULL; + +/*! + * \internal + * \brief Update max fencing attempts before giving up + * + * \param[in] value New max fencing attempts + */ +static void +update_stonith_max_attempts(const char *value) +{ + stonith_max_attempts = char2score(value); + if (stonith_max_attempts < 1UL) { + stonith_max_attempts = 10UL; + } +} + +/*! + * \internal + * \brief Configure reaction to notification of local node being fenced + * + * \param[in] reaction_s Reaction type + */ +static void +set_fence_reaction(const char *reaction_s) +{ + if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) { + fence_reaction_panic = true; + + } else { + if (!pcmk__str_eq(reaction_s, "stop", pcmk__str_casei)) { + crm_warn("Invalid value '%s' for %s, using 'stop'", + reaction_s, XML_CONFIG_ATTR_FENCE_REACTION); + } + fence_reaction_panic = false; + } +} + +/*! + * \internal + * \brief Configure fencing options based on the CIB + * + * \param[in,out] options Name/value pairs for configured options + */ +void +controld_configure_fencing(GHashTable *options) +{ + const char *value = NULL; + + value = g_hash_table_lookup(options, XML_CONFIG_ATTR_FENCE_REACTION); + set_fence_reaction(value); + + value = g_hash_table_lookup(options, "stonith-max-attempts"); + update_stonith_max_attempts(value); +} + +static gboolean +too_many_st_failures(const char *target) +{ + GHashTableIter iter; + const char *key = NULL; + struct st_fail_rec *value = NULL; + + if (stonith_failures == NULL) { + return FALSE; + } + + if (target == NULL) { + g_hash_table_iter_init(&iter, stonith_failures); + while (g_hash_table_iter_next(&iter, (gpointer *) &key, + (gpointer *) &value)) { + + if (value->count >= stonith_max_attempts) { + target = (const char*)key; + goto too_many; + } + } + } else { + value = g_hash_table_lookup(stonith_failures, target); + if ((value != NULL) && (value->count >= stonith_max_attempts)) { + goto too_many; + } + } + return FALSE; + +too_many: + crm_warn("Too many failures (%d) to fence %s, giving up", + value->count, target); + return TRUE; +} + +/*! + * \internal + * \brief Reset a stonith fail count + * + * \param[in] target Name of node to reset, or NULL for all + */ +void +st_fail_count_reset(const char *target) +{ + if (stonith_failures == NULL) { + return; + } + + if (target) { + struct st_fail_rec *rec = NULL; + + rec = g_hash_table_lookup(stonith_failures, target); + if (rec) { + rec->count = 0; + } + } else { + GHashTableIter iter; + const char *key = NULL; + struct st_fail_rec *rec = NULL; + + g_hash_table_iter_init(&iter, stonith_failures); + while (g_hash_table_iter_next(&iter, (gpointer *) &key, + (gpointer *) &rec)) { + rec->count = 0; + } + } +} + +static void +st_fail_count_increment(const char *target) +{ + struct st_fail_rec *rec = NULL; + + if (stonith_failures == NULL) { + stonith_failures = pcmk__strkey_table(free, free); + } + + rec = g_hash_table_lookup(stonith_failures, target); + if (rec) { + rec->count++; + } else { + rec = malloc(sizeof(struct st_fail_rec)); + if(rec == NULL) { + return; + } + + rec->count = 1; + g_hash_table_insert(stonith_failures, strdup(target), rec); + } +} + +/* end stonith fail count functions */ + + +static void +cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, + void *user_data) +{ + if (rc < pcmk_ok) { + crm_err("Fencing update %d for %s: failed - %s (%d)", + call_id, (char *)user_data, pcmk_strerror(rc), rc); + crm_log_xml_warn(msg, "Failed update"); + abort_transition(INFINITY, pcmk__graph_shutdown, "CIB update failed", + NULL); + + } else { + crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data); + } +} + +static void +send_stonith_update(pcmk__graph_action_t *action, const char *target, + const char *uuid) +{ + int rc = pcmk_ok; + crm_node_t *peer = NULL; + + /* We (usually) rely on the membership layer to do node_update_cluster, + * and the peer status callback to do node_update_peer, because the node + * might have already rejoined before we get the stonith result here. + */ + int flags = node_update_join | node_update_expected; + + /* zero out the node-status & remove all LRM status info */ + xmlNode *node_state = NULL; + + CRM_CHECK(target != NULL, return); + CRM_CHECK(uuid != NULL, return); + + /* Make sure the membership and join caches are accurate */ + peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY); + + CRM_CHECK(peer != NULL, return); + + if (peer->state == NULL) { + /* Usually, we rely on the membership layer to update the cluster state + * in the CIB. However, if the node has never been seen, do it here, so + * the node is not considered unclean. + */ + flags |= node_update_cluster; + } + + if (peer->uuid == NULL) { + crm_info("Recording uuid '%s' for node '%s'", uuid, target); + peer->uuid = strdup(uuid); + } + + crmd_peer_down(peer, TRUE); + + /* Generate a node state update for the CIB */ + node_state = create_node_state_update(peer, flags, NULL, __func__); + + /* we have to mark whether or not remote nodes have already been fenced */ + if (peer->flags & crm_remote_node) { + char *now_s = pcmk__ttoa(time(NULL)); + + crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s); + free(now_s); + } + + /* Force our known ID */ + crm_xml_add(node_state, XML_ATTR_ID, uuid); + + rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn, + XML_CIB_TAG_STATUS, node_state, + cib_scope_local + |cib_can_create); + + /* Delay processing the trigger until the update completes */ + crm_debug("Sending fencing update %d for %s", rc, target); + fsa_register_cib_callback(rc, strdup(target), cib_fencing_updated); + + // Make sure it sticks + /* controld_globals.cib_conn->cmds->bump_epoch(controld_globals.cib_conn, + * cib_scope_local); + */ + + controld_delete_node_state(peer->uname, controld_section_all, + cib_scope_local); + free_xml(node_state); + return; +} + +/*! + * \internal + * \brief Abort transition due to stonith failure + * + * \param[in] abort_action Whether to restart or stop transition + * \param[in] target Don't restart if this (NULL for any) has too many failures + * \param[in] reason Log this stonith action XML as abort reason (or NULL) + */ +static void +abort_for_stonith_failure(enum pcmk__graph_next abort_action, + const char *target, const xmlNode *reason) +{ + /* If stonith repeatedly fails, we eventually give up on starting a new + * transition for that reason. + */ + if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) { + abort_action = pcmk__graph_wait; + } + abort_transition(INFINITY, abort_action, "Stonith failed", reason); +} + + +/* + * stonith cleanup list + * + * If the DC is shot, proper notifications might not go out. + * The stonith cleanup list allows the cluster to (re-)send + * notifications once a new DC is elected. + */ + +static GList *stonith_cleanup_list = NULL; + +/*! + * \internal + * \brief Add a node to the stonith cleanup list + * + * \param[in] target Name of node to add + */ +void +add_stonith_cleanup(const char *target) { + stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target)); +} + +/*! + * \internal + * \brief Remove a node from the stonith cleanup list + * + * \param[in] Name of node to remove + */ +void +remove_stonith_cleanup(const char *target) +{ + GList *iter = stonith_cleanup_list; + + while (iter != NULL) { + GList *tmp = iter; + char *iter_name = tmp->data; + + iter = iter->next; + if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) { + crm_trace("Removing %s from the cleanup list", iter_name); + stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp); + free(iter_name); + } + } +} + +/*! + * \internal + * \brief Purge all entries from the stonith cleanup list + */ +void +purge_stonith_cleanup(void) +{ + if (stonith_cleanup_list) { + GList *iter = NULL; + + for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { + char *target = iter->data; + + crm_info("Purging %s from stonith cleanup list", target); + free(target); + } + g_list_free(stonith_cleanup_list); + stonith_cleanup_list = NULL; + } +} + +/*! + * \internal + * \brief Send stonith updates for all entries in cleanup list, then purge it + */ +void +execute_stonith_cleanup(void) +{ + GList *iter; + + for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { + char *target = iter->data; + crm_node_t *target_node = crm_get_peer(0, target); + const char *uuid = crm_peer_uuid(target_node); + + crm_notice("Marking %s, target of a previous stonith action, as clean", target); + send_stonith_update(NULL, target, uuid); + free(target); + } + g_list_free(stonith_cleanup_list); + stonith_cleanup_list = NULL; +} + +/* end stonith cleanup list functions */ + + +/* stonith API client + * + * Functions that need to interact directly with the fencer via its API + */ + +static stonith_t *stonith_api = NULL; +static crm_trigger_t *stonith_reconnect = NULL; +static char *te_client_id = NULL; + +static gboolean +fail_incompletable_stonith(pcmk__graph_t *graph) +{ + GList *lpc = NULL; + const char *task = NULL; + xmlNode *last_action = NULL; + + if (graph == NULL) { + return FALSE; + } + + for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { + GList *lpc2 = NULL; + pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data; + + if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) { + continue; + } + + for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) { + pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data; + + if ((action->type != pcmk__cluster_graph_action) + || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { + continue; + } + + task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); + if (task && pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) { + pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); + last_action = action->xml; + pcmk__update_graph(graph, action); + crm_notice("Failing action %d (%s): fencer terminated", + action->id, ID(action->xml)); + } + } + } + + if (last_action != NULL) { + crm_warn("Fencer failure resulted in unrunnable actions"); + abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action); + return TRUE; + } + + return FALSE; +} + +static void +tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e) +{ + te_cleanup_stonith_history_sync(st, FALSE); + + if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) { + crm_crit("Fencing daemon connection failed"); + mainloop_set_trigger(stonith_reconnect); + + } else { + crm_info("Fencing daemon disconnected"); + } + + if (stonith_api) { + /* the client API won't properly reconnect notifications + * if they are still in the table - so remove them + */ + if (stonith_api->state != stonith_disconnected) { + stonith_api->cmds->disconnect(st); + } + stonith_api->cmds->remove_notification(stonith_api, NULL); + } + + if (AM_I_DC) { + fail_incompletable_stonith(controld_globals.transition_graph); + trigger_graph(); + } +} + +/*! + * \internal + * \brief Handle an event notification from the fencing API + * + * \param[in] st Fencing API connection (ignored) + * \param[in] event Fencing API event notification + */ +static void +handle_fence_notification(stonith_t *st, stonith_event_t *event) +{ + bool succeeded = true; + const char *executioner = "the cluster"; + const char *client = "a client"; + const char *reason = NULL; + int exec_status; + + if (te_client_id == NULL) { + te_client_id = crm_strdup_printf("%s.%lu", crm_system_name, + (unsigned long) getpid()); + } + + if (event == NULL) { + crm_err("Notify data not found"); + return; + } + + if (event->executioner != NULL) { + executioner = event->executioner; + } + if (event->client_origin != NULL) { + client = event->client_origin; + } + + exec_status = stonith__event_execution_status(event); + if ((stonith__event_exit_status(event) != CRM_EX_OK) + || (exec_status != PCMK_EXEC_DONE)) { + succeeded = false; + if (exec_status == PCMK_EXEC_DONE) { + exec_status = PCMK_EXEC_ERROR; + } + } + reason = stonith__event_exit_reason(event); + + crmd_alert_fencing_op(event); + + if (pcmk__str_eq("on", event->action, pcmk__str_none)) { + // Unfencing doesn't need special handling, just a log message + if (succeeded) { + crm_notice("%s was unfenced by %s at the request of %s@%s", + event->target, executioner, client, event->origin); + } else { + crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d", + event->target, executioner, + pcmk_exec_status_str(exec_status), + ((reason == NULL)? "" : ": "), + ((reason == NULL)? "" : reason), + stonith__event_exit_status(event)); + } + return; + } + + if (succeeded + && pcmk__str_eq(event->target, controld_globals.our_nodename, + pcmk__str_casei)) { + /* We were notified of our own fencing. Most likely, either fencing was + * misconfigured, or fabric fencing that doesn't cut cluster + * communication is in use. + * + * Either way, shutting down the local host is a good idea, to require + * administrator intervention. Also, other nodes would otherwise likely + * set our status to lost because of the fencing callback and discard + * our subsequent election votes as "not part of our cluster". + */ + crm_crit("We were allegedly just fenced by %s for %s!", + executioner, event->origin); // Dumps blackbox if enabled + if (fence_reaction_panic) { + pcmk__panic(__func__); + } else { + crm_exit(CRM_EX_FATAL); + } + return; // Should never get here + } + + /* Update the count of fencing failures for this target, in case we become + * DC later. The current DC has already updated its fail count in + * tengine_stonith_callback(). + */ + if (!AM_I_DC) { + if (succeeded) { + st_fail_count_reset(event->target); + } else { + st_fail_count_increment(event->target); + } + } + + crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: " + "%s%s%s%s " CRM_XS " event=%s", + event->target, (succeeded? "" : " not"), + event->action, executioner, client, event->origin, + (succeeded? "OK" : pcmk_exec_status_str(exec_status)), + ((reason == NULL)? "" : " ("), + ((reason == NULL)? "" : reason), + ((reason == NULL)? "" : ")"), + event->id); + + if (succeeded) { + crm_node_t *peer = pcmk__search_known_node_cache(0, event->target, + CRM_GET_PEER_ANY); + const char *uuid = NULL; + + if (peer == NULL) { + return; + } + + uuid = crm_peer_uuid(peer); + + if (AM_I_DC) { + /* The DC always sends updates */ + send_stonith_update(NULL, event->target, uuid); + + /* @TODO Ideally, at this point, we'd check whether the fenced node + * hosted any guest nodes, and call remote_node_down() for them. + * Unfortunately, the controller doesn't have a simple, reliable way + * to map hosts to guests. It might be possible to track this in the + * peer cache via crm_remote_peer_cache_refresh(). For now, we rely + * on the scheduler creating fence pseudo-events for the guests. + */ + + if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) { + /* Abort the current transition if it wasn't the cluster that + * initiated fencing. + */ + crm_info("External fencing operation from %s fenced %s", + client, event->target); + abort_transition(INFINITY, pcmk__graph_restart, + "External Fencing Operation", NULL); + } + + } else if (pcmk__str_eq(controld_globals.dc_name, event->target, + pcmk__str_null_matches|pcmk__str_casei) + && !pcmk_is_set(peer->flags, crm_remote_node)) { + // Assume the target was our DC if we don't currently have one + + if (controld_globals.dc_name != NULL) { + crm_notice("Fencing target %s was our DC", event->target); + } else { + crm_notice("Fencing target %s may have been our DC", + event->target); + } + + /* Given the CIB resyncing that occurs around elections, + * have one node update the CIB now and, if the new DC is different, + * have them do so too after the election + */ + if (pcmk__str_eq(event->executioner, controld_globals.our_nodename, + pcmk__str_casei)) { + send_stonith_update(NULL, event->target, uuid); + } + add_stonith_cleanup(event->target); + } + + /* If the target is a remote node, and we host its connection, + * immediately fail all monitors so it can be recovered quickly. + * The connection won't necessarily drop when a remote node is fenced, + * so the failure might not otherwise be detected until the next poke. + */ + if (pcmk_is_set(peer->flags, crm_remote_node)) { + remote_ra_fail(event->target); + } + + crmd_peer_down(peer, TRUE); + } +} + +/*! + * \brief Connect to fencer + * + * \param[in] user_data If NULL, retry failures now, otherwise retry in main loop + * + * \return TRUE + * \note If user_data is NULL, this will wait 2s between attempts, for up to + * 30 attempts, meaning the controller could be blocked as long as 58s. + */ +static gboolean +te_connect_stonith(gpointer user_data) +{ + int rc = pcmk_ok; + + if (stonith_api == NULL) { + stonith_api = stonith_api_new(); + if (stonith_api == NULL) { + crm_err("Could not connect to fencer: API memory allocation failed"); + return TRUE; + } + } + + if (stonith_api->state != stonith_disconnected) { + crm_trace("Already connected to fencer, no need to retry"); + return TRUE; + } + + if (user_data == NULL) { + // Blocking (retry failures now until successful) + rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30); + if (rc != pcmk_ok) { + crm_err("Could not connect to fencer in 30 attempts: %s " + CRM_XS " rc=%d", pcmk_strerror(rc), rc); + } + } else { + // Non-blocking (retry failures later in main loop) + rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL); + if (rc != pcmk_ok) { + if (pcmk_is_set(controld_globals.fsa_input_register, + R_ST_REQUIRED)) { + crm_notice("Fencer connection failed (will retry): %s " + CRM_XS " rc=%d", pcmk_strerror(rc), rc); + mainloop_set_trigger(stonith_reconnect); + } else { + crm_info("Fencer connection failed (ignoring because no longer required): %s " + CRM_XS " rc=%d", pcmk_strerror(rc), rc); + } + return TRUE; + } + } + + if (rc == pcmk_ok) { + stonith_api->cmds->register_notification(stonith_api, + T_STONITH_NOTIFY_DISCONNECT, + tengine_stonith_connection_destroy); + stonith_api->cmds->register_notification(stonith_api, + T_STONITH_NOTIFY_FENCE, + handle_fence_notification); + stonith_api->cmds->register_notification(stonith_api, + T_STONITH_NOTIFY_HISTORY_SYNCED, + tengine_stonith_history_synced); + te_trigger_stonith_history_sync(TRUE); + crm_notice("Fencer successfully connected"); + } + + return TRUE; +} + +/*! + \internal + \brief Schedule fencer connection attempt in main loop +*/ +void +controld_trigger_fencer_connect(void) +{ + if (stonith_reconnect == NULL) { + stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, + te_connect_stonith, + GINT_TO_POINTER(TRUE)); + } + controld_set_fsa_input_flags(R_ST_REQUIRED); + mainloop_set_trigger(stonith_reconnect); +} + +void +controld_disconnect_fencer(bool destroy) +{ + if (stonith_api) { + // Prevent fencer connection from coming up again + controld_clear_fsa_input_flags(R_ST_REQUIRED); + + if (stonith_api->state != stonith_disconnected) { + stonith_api->cmds->disconnect(stonith_api); + } + stonith_api->cmds->remove_notification(stonith_api, NULL); + } + if (destroy) { + if (stonith_api) { + stonith_api->cmds->free(stonith_api); + stonith_api = NULL; + } + if (stonith_reconnect) { + mainloop_destroy_trigger(stonith_reconnect); + stonith_reconnect = NULL; + } + if (te_client_id) { + free(te_client_id); + te_client_id = NULL; + } + } +} + +static gboolean +do_stonith_history_sync(gpointer user_data) +{ + if (stonith_api && (stonith_api->state != stonith_disconnected)) { + stonith_history_t *history = NULL; + + te_cleanup_stonith_history_sync(stonith_api, FALSE); + stonith_api->cmds->history(stonith_api, + st_opt_sync_call | st_opt_broadcast, + NULL, &history, 5); + stonith_history_free(history); + return TRUE; + } else { + crm_info("Skip triggering stonith history-sync as stonith is disconnected"); + return FALSE; + } +} + +static void +tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data) +{ + char *uuid = NULL; + int stonith_id = -1; + int transition_id = -1; + pcmk__graph_action_t *action = NULL; + const char *target = NULL; + + if ((data == NULL) || (data->userdata == NULL)) { + crm_err("Ignoring fence operation %d result: " + "No transition key given (bug?)", + ((data == NULL)? -1 : data->call_id)); + return; + } + + if (!AM_I_DC) { + const char *reason = stonith__exit_reason(data); + + if (reason == NULL) { + reason = pcmk_exec_status_str(stonith__execution_status(data)); + } + crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s", + data->call_id, stonith__exit_status(data), reason, + (const char *) data->userdata); + return; + } + + CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id, + &stonith_id, NULL), + goto bail); + + if (controld_globals.transition_graph->complete || (stonith_id < 0) + || !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none) + || (controld_globals.transition_graph->id != transition_id)) { + crm_info("Ignoring fence operation %d result: " + "Not from current transition " CRM_XS + " complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)", + data->call_id, + pcmk__btoa(controld_globals.transition_graph->complete), + stonith_id, uuid, controld_globals.te_uuid, transition_id, + controld_globals.transition_graph->id); + goto bail; + } + + action = controld_get_action(stonith_id); + if (action == NULL) { + crm_err("Ignoring fence operation %d result: " + "Action %d not found in transition graph (bug?) " + CRM_XS " uuid=%s transition=%d", + data->call_id, stonith_id, uuid, transition_id); + goto bail; + } + + target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + if (target == NULL) { + crm_err("Ignoring fence operation %d result: No target given (bug?)", + data->call_id); + goto bail; + } + + stop_te_timer(action); + if (stonith__exit_status(data) == CRM_EX_OK) { + const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); + const char *op = crm_meta_value(action->params, "stonith_action"); + + crm_info("Fence operation %d for %s succeeded", data->call_id, target); + if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) { + te_action_confirmed(action, NULL); + if (pcmk__str_eq("on", op, pcmk__str_casei)) { + const char *value = NULL; + char *now = pcmk__ttoa(time(NULL)); + gboolean is_remote_node = FALSE; + + /* This check is not 100% reliable, since this node is not + * guaranteed to have the remote node cached. However, it + * doesn't have to be reliable, since the attribute manager can + * learn a node's "remoteness" by other means sooner or later. + * This allows it to learn more quickly if this node does have + * the information. + */ + if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) { + is_remote_node = TRUE; + } + + update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, + is_remote_node); + free(now); + + value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL); + update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, + is_remote_node); + + value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE); + update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, + is_remote_node); + + } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) { + send_stonith_update(action, target, uuid); + pcmk__set_graph_action_flags(action, + pcmk__graph_action_sent_update); + } + } + st_fail_count_reset(target); + + } else { + enum pcmk__graph_next abort_action = pcmk__graph_restart; + int status = stonith__execution_status(data); + const char *reason = stonith__exit_reason(data); + + if (reason == NULL) { + if (status == PCMK_EXEC_DONE) { + reason = "Agent returned error"; + } else { + reason = pcmk_exec_status_str(status); + } + } + pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); + + /* If no fence devices were available, there's no use in immediately + * checking again, so don't start a new transition in that case. + */ + if (status == PCMK_EXEC_NO_FENCE_DEVICE) { + crm_warn("Fence operation %d for %s failed: %s " + "(aborting transition and giving up for now)", + data->call_id, target, reason); + abort_action = pcmk__graph_wait; + } else { + crm_notice("Fence operation %d for %s failed: %s " + "(aborting transition)", data->call_id, target, reason); + } + + /* Increment the fail count now, so abort_for_stonith_failure() can + * check it. Non-DC nodes will increment it in + * handle_fence_notification(). + */ + st_fail_count_increment(target); + abort_for_stonith_failure(abort_action, target, NULL); + } + + pcmk__update_graph(controld_globals.transition_graph, action); + trigger_graph(); + + bail: + free(data->userdata); + free(uuid); + return; +} + +static int +fence_with_delay(const char *target, const char *type, int delay) +{ + uint32_t options = st_opt_none; // Group of enum stonith_call_options + int timeout_sec = (int) (controld_globals.transition_graph->stonith_timeout + / 1000); + + if (crmd_join_phase_count(crm_join_confirmed) == 1) { + stonith__set_call_options(options, target, st_opt_allow_suicide); + } + return stonith_api->cmds->fence_with_delay(stonith_api, options, target, + type, timeout_sec, 0, delay); +} + +/*! + * \internal + * \brief Execute a fencing action from a transition graph + * + * \param[in] graph Transition graph being executed (ignored) + * \param[in] action Fencing action to execute + * + * \return Standard Pacemaker return code + */ +int +controld_execute_fence_action(pcmk__graph_t *graph, + pcmk__graph_action_t *action) +{ + int rc = 0; + const char *id = ID(action->xml); + const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); + const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + const char *type = crm_meta_value(action->params, "stonith_action"); + char *transition_key = NULL; + const char *priority_delay = NULL; + int delay_i = 0; + gboolean invalid_action = FALSE; + int stonith_timeout = (int) (controld_globals.transition_graph->stonith_timeout + / 1000); + + CRM_CHECK(id != NULL, invalid_action = TRUE); + CRM_CHECK(uuid != NULL, invalid_action = TRUE); + CRM_CHECK(type != NULL, invalid_action = TRUE); + CRM_CHECK(target != NULL, invalid_action = TRUE); + + if (invalid_action) { + crm_log_xml_warn(action->xml, "BadAction"); + return EPROTO; + } + + priority_delay = crm_meta_value(action->params, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY); + + crm_notice("Requesting fencing (%s) targeting node %s " + CRM_XS " action=%s timeout=%i%s%s", + type, target, id, stonith_timeout, + priority_delay ? " priority_delay=" : "", + priority_delay ? priority_delay : ""); + + /* Passing NULL means block until we can connect... */ + te_connect_stonith(NULL); + + pcmk__scan_min_int(priority_delay, &delay_i, 0); + rc = fence_with_delay(target, type, delay_i); + transition_key = pcmk__transition_key(controld_globals.transition_graph->id, + action->id, 0, + controld_globals.te_uuid), + stonith_api->cmds->register_callback(stonith_api, rc, + (stonith_timeout + + (delay_i > 0 ? delay_i : 0)), + st_opt_timeout_updates, transition_key, + "tengine_stonith_callback", + tengine_stonith_callback); + return pcmk_rc_ok; +} + +bool +controld_verify_stonith_watchdog_timeout(const char *value) +{ + const char *our_nodename = controld_globals.our_nodename; + gboolean rv = TRUE; + + if (stonith_api && (stonith_api->state != stonith_disconnected) && + stonith__watchdog_fencing_enabled_for_node_api(stonith_api, + our_nodename)) { + rv = pcmk__valid_sbd_timeout(value); + } + return rv; +} + +/* end stonith API client functions */ + + +/* + * stonith history synchronization + * + * Each node's fencer keeps track of a cluster-wide fencing history. When a node + * joins or leaves, we need to synchronize the history across all nodes. + */ + +static crm_trigger_t *stonith_history_sync_trigger = NULL; +static mainloop_timer_t *stonith_history_sync_timer_short = NULL; +static mainloop_timer_t *stonith_history_sync_timer_long = NULL; + +void +te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers) +{ + if (free_timers) { + mainloop_timer_del(stonith_history_sync_timer_short); + stonith_history_sync_timer_short = NULL; + mainloop_timer_del(stonith_history_sync_timer_long); + stonith_history_sync_timer_long = NULL; + } else { + mainloop_timer_stop(stonith_history_sync_timer_short); + mainloop_timer_stop(stonith_history_sync_timer_long); + } + + if (st) { + st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED); + } +} + +static void +tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event) +{ + te_cleanup_stonith_history_sync(st, FALSE); + crm_debug("Fence-history synced - cancel all timers"); +} + +static gboolean +stonith_history_sync_set_trigger(gpointer user_data) +{ + mainloop_set_trigger(stonith_history_sync_trigger); + return FALSE; +} + +void +te_trigger_stonith_history_sync(bool long_timeout) +{ + /* trigger a sync in 5s to give more nodes the + * chance to show up so that we don't create + * unnecessary stonith-history-sync traffic + * + * the long timeout of 30s is there as a fallback + * so that after a successful connection to fenced + * we will wait for 30s for the DC to trigger a + * history-sync + * if this doesn't happen we trigger a sync locally + * (e.g. fenced segfaults and is restarted by pacemakerd) + */ + + /* as we are finally checking the stonith-connection + * in do_stonith_history_sync we should be fine + * leaving stonith_history_sync_time & stonith_history_sync_trigger + * around + */ + if (stonith_history_sync_trigger == NULL) { + stonith_history_sync_trigger = + mainloop_add_trigger(G_PRIORITY_LOW, + do_stonith_history_sync, NULL); + } + + if (long_timeout) { + if(stonith_history_sync_timer_long == NULL) { + stonith_history_sync_timer_long = + mainloop_timer_add("history_sync_long", 30000, + FALSE, stonith_history_sync_set_trigger, + NULL); + } + crm_info("Fence history will be synchronized cluster-wide within 30 seconds"); + mainloop_timer_start(stonith_history_sync_timer_long); + } else { + if(stonith_history_sync_timer_short == NULL) { + stonith_history_sync_timer_short = + mainloop_timer_add("history_sync_short", 5000, + FALSE, stonith_history_sync_set_trigger, + NULL); + } + crm_info("Fence history will be synchronized cluster-wide within 5 seconds"); + mainloop_timer_start(stonith_history_sync_timer_short); + } + +} + +/* end stonith history synchronization functions */ diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h new file mode 100644 index 0000000..86a5050 --- /dev/null +++ b/daemons/controld/controld_fencing.h @@ -0,0 +1,38 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef CONTROLD_FENCING__H +# define CONTROLD_FENCING__H + +#include <stdbool.h> // bool +#include <pacemaker-internal.h> // pcmk__graph_t, pcmk__graph_action_t + +void controld_configure_fencing(GHashTable *options); + +// stonith fail counts +void st_fail_count_reset(const char * target); + +// stonith API client +void controld_trigger_fencer_connect(void); +void controld_disconnect_fencer(bool destroy); +int controld_execute_fence_action(pcmk__graph_t *graph, + pcmk__graph_action_t *action); +bool controld_verify_stonith_watchdog_timeout(const char *value); + +// stonith cleanup list +void add_stonith_cleanup(const char *target); +void remove_stonith_cleanup(const char *target); +void purge_stonith_cleanup(void); +void execute_stonith_cleanup(void); + +// stonith history synchronization +void te_trigger_stonith_history_sync(bool long_timeout); +void te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers); + +#endif diff --git a/daemons/controld/controld_fsa.c b/daemons/controld/controld_fsa.c new file mode 100644 index 0000000..622d1c8 --- /dev/null +++ b/daemons/controld/controld_fsa.c @@ -0,0 +1,741 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <sys/param.h> +#include <stdio.h> +#include <stdint.h> // uint64_t +#include <string.h> +#include <time.h> + +#include <crm/crm.h> +#include <crm/lrmd.h> +#include <crm/cib.h> +#include <crm/msg_xml.h> +#include <crm/common/xml.h> +#include <crm/cluster/election_internal.h> +#include <crm/cluster.h> + +#include <pacemaker-controld.h> + +//! Triggers an FSA invocation +static crm_trigger_t *fsa_trigger = NULL; + +#define DOT_PREFIX "actions:trace: " +#define do_dot_log(fmt, args...) crm_trace( fmt, ##args) + +static void do_state_transition(enum crmd_fsa_state cur_state, + enum crmd_fsa_state next_state, + fsa_data_t *msg_data); + +void s_crmd_fsa_actions(fsa_data_t * fsa_data); +void log_fsa_input(fsa_data_t * stored_msg); +void init_dotfile(void); + +void +init_dotfile(void) +{ + do_dot_log(DOT_PREFIX "digraph \"g\" {"); + do_dot_log(DOT_PREFIX " size = \"30,30\""); + do_dot_log(DOT_PREFIX " graph ["); + do_dot_log(DOT_PREFIX " fontsize = \"12\""); + do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); + do_dot_log(DOT_PREFIX " fontcolor = \"black\""); + do_dot_log(DOT_PREFIX " bb = \"0,0,398.922306,478.927856\""); + do_dot_log(DOT_PREFIX " color = \"black\""); + do_dot_log(DOT_PREFIX " ]"); + do_dot_log(DOT_PREFIX " node ["); + do_dot_log(DOT_PREFIX " fontsize = \"12\""); + do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); + do_dot_log(DOT_PREFIX " fontcolor = \"black\""); + do_dot_log(DOT_PREFIX " shape = \"ellipse\""); + do_dot_log(DOT_PREFIX " color = \"black\""); + do_dot_log(DOT_PREFIX " ]"); + do_dot_log(DOT_PREFIX " edge ["); + do_dot_log(DOT_PREFIX " fontsize = \"12\""); + do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); + do_dot_log(DOT_PREFIX " fontcolor = \"black\""); + do_dot_log(DOT_PREFIX " color = \"black\""); + do_dot_log(DOT_PREFIX " ]"); + do_dot_log(DOT_PREFIX "// special nodes"); + do_dot_log(DOT_PREFIX " \"S_PENDING\" "); + do_dot_log(DOT_PREFIX " ["); + do_dot_log(DOT_PREFIX " color = \"blue\""); + do_dot_log(DOT_PREFIX " fontcolor = \"blue\""); + do_dot_log(DOT_PREFIX " ]"); + do_dot_log(DOT_PREFIX " \"S_TERMINATE\" "); + do_dot_log(DOT_PREFIX " ["); + do_dot_log(DOT_PREFIX " color = \"red\""); + do_dot_log(DOT_PREFIX " fontcolor = \"red\""); + do_dot_log(DOT_PREFIX " ]"); + do_dot_log(DOT_PREFIX "// DC only nodes"); + do_dot_log(DOT_PREFIX " \"S_INTEGRATION\" [ fontcolor = \"green\" ]"); + do_dot_log(DOT_PREFIX " \"S_POLICY_ENGINE\" [ fontcolor = \"green\" ]"); + do_dot_log(DOT_PREFIX " \"S_TRANSITION_ENGINE\" [ fontcolor = \"green\" ]"); + do_dot_log(DOT_PREFIX " \"S_RELEASE_DC\" [ fontcolor = \"green\" ]"); + do_dot_log(DOT_PREFIX " \"S_IDLE\" [ fontcolor = \"green\" ]"); +} + +static void +do_fsa_action(fsa_data_t * fsa_data, long long an_action, + void (*function) (long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t * msg_data)) +{ + controld_clear_fsa_action_flags(an_action); + crm_trace(DOT_PREFIX "\t// %s", fsa_action2string(an_action)); + function(an_action, fsa_data->fsa_cause, controld_globals.fsa_state, + fsa_data->fsa_input, fsa_data); +} + +static const uint64_t startup_actions = + A_STARTUP | A_CIB_START | A_LRM_CONNECT | A_HA_CONNECT | A_READCONFIG | + A_STARTED | A_CL_JOIN_QUERY; + +// A_LOG, A_WARN, A_ERROR +void +do_log(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t *msg_data) +{ + unsigned log_type = LOG_TRACE; + + if (action & A_LOG) { + log_type = LOG_INFO; + } else if (action & A_WARN) { + log_type = LOG_WARNING; + } else if (action & A_ERROR) { + log_type = LOG_ERR; + } + + do_crm_log(log_type, "Input %s received in state %s from %s", + fsa_input2string(msg_data->fsa_input), + fsa_state2string(cur_state), msg_data->origin); + + if (msg_data->data_type == fsa_dt_ha_msg) { + ha_msg_input_t *input = fsa_typed_data(msg_data->data_type); + + crm_log_xml_debug(input->msg, __func__); + + } else if (msg_data->data_type == fsa_dt_xml) { + xmlNode *input = fsa_typed_data(msg_data->data_type); + + crm_log_xml_debug(input, __func__); + + } else if (msg_data->data_type == fsa_dt_lrm) { + lrmd_event_data_t *input = fsa_typed_data(msg_data->data_type); + + do_crm_log(log_type, + "Resource %s: Call ID %d returned %d (%d)." + " New status if rc=0: %s", + input->rsc_id, input->call_id, input->rc, + input->op_status, (char *)input->user_data); + } +} + +/*! + * \internal + * \brief Initialize the FSA trigger + */ +void +controld_init_fsa_trigger(void) +{ + fsa_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL); +} + +/*! + * \internal + * \brief Destroy the FSA trigger + */ +void +controld_destroy_fsa_trigger(void) +{ + // This basically will not work, since mainloop has a reference to it + mainloop_destroy_trigger(fsa_trigger); + fsa_trigger = NULL; +} + +/*! + * \internal + * \brief Trigger an FSA invocation + * + * \param[in] fn Calling function name + * \param[in] line Line number where call occurred + */ +void +controld_trigger_fsa_as(const char *fn, int line) +{ + if (fsa_trigger != NULL) { + crm_trace("%s:%d - Triggered FSA invocation", fn, line); + mainloop_set_trigger(fsa_trigger); + } +} + +enum crmd_fsa_state +s_crmd_fsa(enum crmd_fsa_cause cause) +{ + controld_globals_t *globals = &controld_globals; + fsa_data_t *fsa_data = NULL; + uint64_t register_copy = controld_globals.fsa_input_register; + uint64_t new_actions = A_NOTHING; + enum crmd_fsa_state last_state; + + crm_trace("FSA invoked with Cause: %s\tState: %s", + fsa_cause2string(cause), + fsa_state2string(globals->fsa_state)); + + fsa_dump_actions(controld_globals.fsa_actions, "Initial"); + + controld_clear_global_flags(controld_fsa_is_stalled); + if ((controld_globals.fsa_message_queue == NULL) + && (controld_globals.fsa_actions != A_NOTHING)) { + /* fake the first message so we can get into the loop */ + fsa_data = calloc(1, sizeof(fsa_data_t)); + fsa_data->fsa_input = I_NULL; + fsa_data->fsa_cause = C_FSA_INTERNAL; + fsa_data->origin = __func__; + fsa_data->data_type = fsa_dt_none; + controld_globals.fsa_message_queue + = g_list_append(controld_globals.fsa_message_queue, fsa_data); + fsa_data = NULL; + } + while ((controld_globals.fsa_message_queue != NULL) + && !pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) { + crm_trace("Checking messages (%d remaining)", + g_list_length(controld_globals.fsa_message_queue)); + + fsa_data = get_message(); + if(fsa_data == NULL) { + continue; + } + + log_fsa_input(fsa_data); + + /* add any actions back to the queue */ + controld_set_fsa_action_flags(fsa_data->actions); + fsa_dump_actions(fsa_data->actions, "Restored actions"); + + /* get the next batch of actions */ + new_actions = controld_fsa_get_action(fsa_data->fsa_input); + controld_set_fsa_action_flags(new_actions); + fsa_dump_actions(new_actions, "New actions"); + + if (fsa_data->fsa_input != I_NULL && fsa_data->fsa_input != I_ROUTER) { + crm_debug("Processing %s: [ state=%s cause=%s origin=%s ]", + fsa_input2string(fsa_data->fsa_input), + fsa_state2string(globals->fsa_state), + fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); + } + + /* logging : *before* the state is changed */ + if (pcmk_is_set(controld_globals.fsa_actions, A_ERROR)) { + do_fsa_action(fsa_data, A_ERROR, do_log); + } + if (pcmk_is_set(controld_globals.fsa_actions, A_WARN)) { + do_fsa_action(fsa_data, A_WARN, do_log); + } + if (pcmk_is_set(controld_globals.fsa_actions, A_LOG)) { + do_fsa_action(fsa_data, A_LOG, do_log); + } + + /* update state variables */ + last_state = globals->fsa_state; + globals->fsa_state = controld_fsa_get_next_state(fsa_data->fsa_input); + + /* + * Remove certain actions during shutdown + */ + if ((globals->fsa_state == S_STOPPING) + || pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + controld_clear_fsa_action_flags(startup_actions); + } + + /* + * Hook for change of state. + * Allows actions to be added or removed when entering a state + */ + if (last_state != globals->fsa_state) { + do_state_transition(last_state, globals->fsa_state, fsa_data); + } else { + do_dot_log(DOT_PREFIX "\t// FSA input: State=%s \tCause=%s" + " \tInput=%s \tOrigin=%s() \tid=%d", + fsa_state2string(globals->fsa_state), + fsa_cause2string(fsa_data->fsa_cause), + fsa_input2string(fsa_data->fsa_input), fsa_data->origin, fsa_data->id); + } + + /* start doing things... */ + s_crmd_fsa_actions(fsa_data); + delete_fsa_input(fsa_data); + fsa_data = NULL; + } + + if ((controld_globals.fsa_message_queue != NULL) + || (controld_globals.fsa_actions != A_NOTHING) + || pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) { + + crm_debug("Exiting the FSA: queue=%d, fsa_actions=%#llx, stalled=%s", + g_list_length(controld_globals.fsa_message_queue), + (unsigned long long) controld_globals.fsa_actions, + pcmk__btoa(pcmk_is_set(controld_globals.flags, + controld_fsa_is_stalled))); + } else { + crm_trace("Exiting the FSA"); + } + + /* cleanup inputs? */ + if (register_copy != controld_globals.fsa_input_register) { + uint64_t same = register_copy & controld_globals.fsa_input_register; + + fsa_dump_inputs(LOG_DEBUG, "Added", + controld_globals.fsa_input_register ^ same); + fsa_dump_inputs(LOG_DEBUG, "Removed", register_copy ^ same); + } + + fsa_dump_actions(controld_globals.fsa_actions, "Remaining"); + fsa_dump_queue(LOG_DEBUG); + + return globals->fsa_state; +} + +void +s_crmd_fsa_actions(fsa_data_t * fsa_data) +{ + /* + * Process actions in order of priority but do only one + * action at a time to avoid complicating the ordering. + */ + CRM_CHECK(fsa_data != NULL, return); + while ((controld_globals.fsa_actions != A_NOTHING) + && !pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) { + + /* regular action processing in order of action priority + * + * Make sure all actions that connect to required systems + * are performed first + */ + if (pcmk_is_set(controld_globals.fsa_actions, A_ERROR)) { + do_fsa_action(fsa_data, A_ERROR, do_log); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_WARN)) { + do_fsa_action(fsa_data, A_WARN, do_log); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_LOG)) { + do_fsa_action(fsa_data, A_LOG, do_log); + + /* get out of here NOW! before anything worse happens */ + } else if (pcmk_is_set(controld_globals.fsa_actions, A_EXIT_1)) { + do_fsa_action(fsa_data, A_EXIT_1, do_exit); + + /* sub-system restart */ + } else if (pcmk_all_flags_set(controld_globals.fsa_actions, + O_LRM_RECONNECT)) { + do_fsa_action(fsa_data, O_LRM_RECONNECT, do_lrm_control); + + } else if (pcmk_all_flags_set(controld_globals.fsa_actions, + O_CIB_RESTART)) { + do_fsa_action(fsa_data, O_CIB_RESTART, do_cib_control); + + } else if (pcmk_all_flags_set(controld_globals.fsa_actions, + O_PE_RESTART)) { + do_fsa_action(fsa_data, O_PE_RESTART, do_pe_control); + + } else if (pcmk_all_flags_set(controld_globals.fsa_actions, + O_TE_RESTART)) { + do_fsa_action(fsa_data, O_TE_RESTART, do_te_control); + + /* essential start tasks */ + } else if (pcmk_is_set(controld_globals.fsa_actions, A_STARTUP)) { + do_fsa_action(fsa_data, A_STARTUP, do_startup); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_CIB_START)) { + do_fsa_action(fsa_data, A_CIB_START, do_cib_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_HA_CONNECT)) { + do_fsa_action(fsa_data, A_HA_CONNECT, do_ha_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_READCONFIG)) { + do_fsa_action(fsa_data, A_READCONFIG, do_read_config); + + /* sub-system start/connect */ + } else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_CONNECT)) { + do_fsa_action(fsa_data, A_LRM_CONNECT, do_lrm_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_START)) { + do_fsa_action(fsa_data, A_TE_START, do_te_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_START)) { + do_fsa_action(fsa_data, A_PE_START, do_pe_control); + + /* Timers */ + } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_TIMER_STOP)) { + do_fsa_action(fsa_data, A_DC_TIMER_STOP, do_timer_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_INTEGRATE_TIMER_STOP)) { + do_fsa_action(fsa_data, A_INTEGRATE_TIMER_STOP, do_timer_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_INTEGRATE_TIMER_START)) { + do_fsa_action(fsa_data, A_INTEGRATE_TIMER_START, do_timer_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_FINALIZE_TIMER_STOP)) { + do_fsa_action(fsa_data, A_FINALIZE_TIMER_STOP, do_timer_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_FINALIZE_TIMER_START)) { + do_fsa_action(fsa_data, A_FINALIZE_TIMER_START, do_timer_control); + + /* + * Highest priority actions + */ + } else if (pcmk_is_set(controld_globals.fsa_actions, A_MSG_ROUTE)) { + do_fsa_action(fsa_data, A_MSG_ROUTE, do_msg_route); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_RECOVER)) { + do_fsa_action(fsa_data, A_RECOVER, do_recover); + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_CL_JOIN_RESULT)) { + do_fsa_action(fsa_data, A_CL_JOIN_RESULT, + do_cl_join_finalize_respond); + + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_CL_JOIN_REQUEST)) { + do_fsa_action(fsa_data, A_CL_JOIN_REQUEST, + do_cl_join_offer_respond); + + } else if (pcmk_is_set(controld_globals.fsa_actions, A_SHUTDOWN_REQ)) { + do_fsa_action(fsa_data, A_SHUTDOWN_REQ, do_shutdown_req); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_ELECTION_VOTE)) { + do_fsa_action(fsa_data, A_ELECTION_VOTE, do_election_vote); + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_ELECTION_COUNT)) { + do_fsa_action(fsa_data, A_ELECTION_COUNT, do_election_count_vote); + + } else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_EVENT)) { + do_fsa_action(fsa_data, A_LRM_EVENT, do_lrm_event); + + /* + * High priority actions + */ + } else if (pcmk_is_set(controld_globals.fsa_actions, A_STARTED)) { + do_fsa_action(fsa_data, A_STARTED, do_started); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_CL_JOIN_QUERY)) { + do_fsa_action(fsa_data, A_CL_JOIN_QUERY, do_cl_join_query); + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_DC_TIMER_START)) { + do_fsa_action(fsa_data, A_DC_TIMER_START, do_timer_control); + + /* + * Medium priority actions + * - Membership + */ + } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_TAKEOVER)) { + do_fsa_action(fsa_data, A_DC_TAKEOVER, do_dc_takeover); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_RELEASE)) { + do_fsa_action(fsa_data, A_DC_RELEASE, do_dc_release); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_JOIN_FINAL)) { + do_fsa_action(fsa_data, A_DC_JOIN_FINAL, do_dc_join_final); + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_ELECTION_CHECK)) { + do_fsa_action(fsa_data, A_ELECTION_CHECK, do_election_check); + + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_ELECTION_START)) { + do_fsa_action(fsa_data, A_ELECTION_START, do_election_vote); + + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_DC_JOIN_OFFER_ALL)) { + do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ALL, do_dc_join_offer_all); + + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_DC_JOIN_OFFER_ONE)) { + do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ONE, do_dc_join_offer_one); + + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_DC_JOIN_PROCESS_REQ)) { + do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_REQ, + do_dc_join_filter_offer); + + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_DC_JOIN_PROCESS_ACK)) { + do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_ACK, do_dc_join_ack); + + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_DC_JOIN_FINALIZE)) { + do_fsa_action(fsa_data, A_DC_JOIN_FINALIZE, do_dc_join_finalize); + + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_CL_JOIN_ANNOUNCE)) { + do_fsa_action(fsa_data, A_CL_JOIN_ANNOUNCE, do_cl_join_announce); + + /* + * Low(er) priority actions + * Make sure the CIB is always updated before invoking the + * scheduler, and the scheduler before the transition engine. + */ + } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_HALT)) { + do_fsa_action(fsa_data, A_TE_HALT, do_te_invoke); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_CANCEL)) { + do_fsa_action(fsa_data, A_TE_CANCEL, do_te_invoke); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_INVOKE)) { + do_fsa_action(fsa_data, A_LRM_INVOKE, do_lrm_invoke); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_INVOKE)) { + do_fsa_action(fsa_data, A_PE_INVOKE, do_pe_invoke); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_INVOKE)) { + do_fsa_action(fsa_data, A_TE_INVOKE, do_te_invoke); + + /* Shutdown actions */ + } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_RELEASED)) { + do_fsa_action(fsa_data, A_DC_RELEASED, do_dc_release); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_STOP)) { + do_fsa_action(fsa_data, A_PE_STOP, do_pe_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_STOP)) { + do_fsa_action(fsa_data, A_TE_STOP, do_te_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_SHUTDOWN)) { + do_fsa_action(fsa_data, A_SHUTDOWN, do_shutdown); + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_LRM_DISCONNECT)) { + do_fsa_action(fsa_data, A_LRM_DISCONNECT, do_lrm_control); + + } else if (pcmk_is_set(controld_globals.fsa_actions, A_HA_DISCONNECT)) { + do_fsa_action(fsa_data, A_HA_DISCONNECT, do_ha_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_CIB_STOP)) { + do_fsa_action(fsa_data, A_CIB_STOP, do_cib_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_STOP)) { + do_fsa_action(fsa_data, A_STOP, do_stop); + + /* exit gracefully */ + } else if (pcmk_is_set(controld_globals.fsa_actions, A_EXIT_0)) { + do_fsa_action(fsa_data, A_EXIT_0, do_exit); + + /* Error checking and reporting */ + } else { + crm_err("Action %s not supported "CRM_XS" %#llx", + fsa_action2string(controld_globals.fsa_actions), + (unsigned long long) controld_globals.fsa_actions); + register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, fsa_data, NULL, + __func__); + } + } +} + +void +log_fsa_input(fsa_data_t * stored_msg) +{ + CRM_ASSERT(stored_msg); + crm_trace("Processing queued input %d", stored_msg->id); + if (stored_msg->fsa_cause == C_LRM_OP_CALLBACK) { + crm_trace("FSA processing LRM callback from %s", stored_msg->origin); + + } else if (stored_msg->data == NULL) { + crm_trace("FSA processing input from %s", stored_msg->origin); + + } else { + ha_msg_input_t *ha_input = fsa_typed_data_adv(stored_msg, fsa_dt_ha_msg, + __func__); + + crm_trace("FSA processing XML message from %s", stored_msg->origin); + crm_log_xml_trace(ha_input->xml, "FSA message data"); + } +} + +static void +check_join_counts(fsa_data_t *msg_data) +{ + int count; + guint npeers; + + count = crmd_join_phase_count(crm_join_finalized); + if (count > 0) { + crm_err("%d cluster node%s failed to confirm join", + count, pcmk__plural_s(count)); + crmd_join_phase_log(LOG_NOTICE); + return; + } + + npeers = crm_active_peers(); + count = crmd_join_phase_count(crm_join_confirmed); + if (count == npeers) { + if (npeers == 1) { + crm_debug("Sole active cluster node is fully joined"); + } else { + crm_debug("All %d active cluster nodes are fully joined", count); + } + + } else if (count > npeers) { + crm_err("New election needed because more nodes confirmed join " + "than are in membership (%d > %u)", count, npeers); + register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); + + } else if (controld_globals.membership_id != crm_peer_seq) { + crm_info("New join needed because membership changed (%llu -> %llu)", + controld_globals.membership_id, crm_peer_seq); + register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); + + } else { + crm_warn("Only %d of %u active cluster nodes fully joined " + "(%d did not respond to offer)", + count, npeers, crmd_join_phase_count(crm_join_welcomed)); + } +} + +static void +do_state_transition(enum crmd_fsa_state cur_state, + enum crmd_fsa_state next_state, fsa_data_t *msg_data) +{ + int level = LOG_INFO; + int count = 0; + gboolean clear_recovery_bit = TRUE; +#if 0 + uint64_t original_fsa_actions = controld_globals.fsa_actions; +#endif + + enum crmd_fsa_cause cause = msg_data->fsa_cause; + enum crmd_fsa_input current_input = msg_data->fsa_input; + + const char *state_from = fsa_state2string(cur_state); + const char *state_to = fsa_state2string(next_state); + const char *input = fsa_input2string(current_input); + + CRM_LOG_ASSERT(cur_state != next_state); + + do_dot_log(DOT_PREFIX "\t%s -> %s [ label=%s cause=%s origin=%s ]", + state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); + + if (cur_state == S_IDLE || next_state == S_IDLE) { + level = LOG_NOTICE; + } else if (cur_state == S_NOT_DC || next_state == S_NOT_DC) { + level = LOG_NOTICE; + } else if (cur_state == S_ELECTION) { + level = LOG_NOTICE; + } else if (cur_state == S_STARTING) { + level = LOG_NOTICE; + } else if (next_state == S_RECOVERY) { + level = LOG_WARNING; + } + + do_crm_log(level, "State transition %s -> %s " + CRM_XS " input=%s cause=%s origin=%s", + state_from, state_to, input, fsa_cause2string(cause), + msg_data->origin); + + if (next_state != S_ELECTION && cur_state != S_RELEASE_DC) { + controld_stop_current_election_timeout(); + } +#if 0 + if ((controld_globals.fsa_input_register & R_SHUTDOWN)) { + controld_set_fsa_action_flags(A_DC_TIMER_STOP); + } +#endif + if (next_state == S_INTEGRATION) { + controld_set_fsa_action_flags(A_INTEGRATE_TIMER_START); + } else { + controld_set_fsa_action_flags(A_INTEGRATE_TIMER_STOP); + } + + if (next_state == S_FINALIZE_JOIN) { + controld_set_fsa_action_flags(A_FINALIZE_TIMER_START); + } else { + controld_set_fsa_action_flags(A_FINALIZE_TIMER_STOP); + } + + if (next_state != S_PENDING) { + controld_set_fsa_action_flags(A_DC_TIMER_STOP); + } + if (next_state != S_IDLE) { + controld_stop_recheck_timer(); + } + + if (cur_state == S_FINALIZE_JOIN && next_state == S_POLICY_ENGINE) { + populate_cib_nodes(node_update_quick|node_update_all, __func__); + } + + switch (next_state) { + case S_PENDING: + { + cib_t *cib_conn = controld_globals.cib_conn; + cib_conn->cmds->set_secondary(cib_conn, cib_scope_local); + } + update_dc(NULL); + break; + + case S_ELECTION: + update_dc(NULL); + break; + + case S_NOT_DC: + controld_reset_counter_election_timer(); + purge_stonith_cleanup(); + + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + crm_info("(Re)Issuing shutdown request now" " that we have a new DC"); + controld_set_fsa_action_flags(A_SHUTDOWN_REQ); + } + CRM_LOG_ASSERT(controld_globals.dc_name != NULL); + if (controld_globals.dc_name == NULL) { + crm_err("Reached S_NOT_DC without a DC" " being recorded"); + } + break; + + case S_RECOVERY: + clear_recovery_bit = FALSE; + break; + + case S_FINALIZE_JOIN: + CRM_LOG_ASSERT(AM_I_DC); + if (cause == C_TIMER_POPPED) { + crm_warn("Progressed to state %s after %s", + fsa_state2string(next_state), fsa_cause2string(cause)); + } + count = crmd_join_phase_count(crm_join_welcomed); + if (count > 0) { + crm_warn("%d cluster node%s failed to respond to join offer", + count, pcmk__plural_s(count)); + crmd_join_phase_log(LOG_NOTICE); + + } else { + crm_debug("All cluster nodes (%d) responded to join offer", + crmd_join_phase_count(crm_join_integrated)); + } + break; + + case S_POLICY_ENGINE: + controld_reset_counter_election_timer(); + CRM_LOG_ASSERT(AM_I_DC); + if (cause == C_TIMER_POPPED) { + crm_info("Progressed to state %s after %s", + fsa_state2string(next_state), fsa_cause2string(cause)); + } + check_join_counts(msg_data); + break; + + case S_STOPPING: + case S_TERMINATE: + /* possibly redundant */ + controld_set_fsa_input_flags(R_SHUTDOWN); + break; + + case S_IDLE: + CRM_LOG_ASSERT(AM_I_DC); + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + crm_info("(Re)Issuing shutdown request now" " that we are the DC"); + controld_set_fsa_action_flags(A_SHUTDOWN_REQ); + } + controld_start_recheck_timer(); + break; + + default: + break; + } + + if (clear_recovery_bit && next_state != S_PENDING) { + controld_clear_fsa_action_flags(A_RECOVER); + } else if (clear_recovery_bit == FALSE) { + controld_set_fsa_action_flags(A_RECOVER); + } + +#if 0 + if (original_fsa_actions != controld_globals.fsa_actions) { + fsa_dump_actions(original_fsa_actions ^ controld_globals.fsa_actions, + "New actions"); + } +#endif +} diff --git a/daemons/controld/controld_fsa.h b/daemons/controld/controld_fsa.h new file mode 100644 index 0000000..2b79f07 --- /dev/null +++ b/daemons/controld/controld_fsa.h @@ -0,0 +1,694 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef CRMD_FSA__H +# define CRMD_FSA__H + +# include <crm/crm.h> +# include <crm/cib.h> +# include <crm/common/xml.h> +# include <crm/common/mainloop.h> +# include <crm/cluster.h> +# include <crm/cluster/election_internal.h> +# include <crm/common/ipc_internal.h> + +/*! States the controller can be in */ +enum crmd_fsa_state { + S_IDLE = 0, /* Nothing happening */ + + S_ELECTION, /* Take part in the election algorithm as + * described below + */ + S_INTEGRATION, /* integrate that status of new nodes (which is + * all of them if we have just been elected DC) + * to form a complete and up-to-date picture of + * the CIB + */ + S_FINALIZE_JOIN, /* integrate that status of new nodes (which is + * all of them if we have just been elected DC) + * to form a complete and up-to-date picture of + * the CIB + */ + S_NOT_DC, /* we are in non-DC mode */ + S_POLICY_ENGINE, /* Determine next stable state of the cluster */ + S_RECOVERY, /* Something bad happened, check everything is ok + * before continuing and attempt to recover if + * required + */ + S_RELEASE_DC, /* we were the DC, but now we arent anymore, + * possibly by our own request, and we should + * release all unnecessary sub-systems, finish + * any pending actions, do general cleanup and + * unset anything that makes us think we are + * special :) + */ + S_STARTING, /* we are just starting out */ + S_PENDING, /* we are not a full/active member yet */ + S_STOPPING, /* We are in the final stages of shutting down */ + S_TERMINATE, /* We are going to shutdown, this is the equiv of + * "Sending TERM signal to all processes" in Linux + * and in worst case scenarios could be considered + * a self STONITH + */ + S_TRANSITION_ENGINE, /* Attempt to make the calculated next stable + * state of the cluster a reality + */ + + S_HALT, /* Freeze - don't do anything + * Something bad happened that needs the admin to fix + * Wait for I_ELECTION + */ + + /* ----------- Last input found in table is above ---------- */ + S_ILLEGAL /* This is an illegal FSA state */ + /* (must be last) */ +}; + +# define MAXSTATE S_ILLEGAL + +/* + Once we start and do some basic sanity checks, we go into the + S_NOT_DC state and await instructions from the DC or input from + the cluster layer which indicates the election algorithm needs to run. + + If the election algorithm is triggered, we enter the S_ELECTION state + from where we can either go back to the S_NOT_DC state or progress + to the S_INTEGRATION state (or S_RELEASE_DC if we used to be the DC + but aren't anymore). See the libcrmcluster API documentation for more + information about the election algorithm. + + Once the election is complete, if we are the DC, we enter the + S_INTEGRATION state which is a DC-in-waiting style state. We are + the DC, but we shouldn't do anything yet because we may not have an + up-to-date picture of the cluster. There may of course be times + when this fails, so we should go back to the S_RECOVERY stage and + check everything is ok. We may also end up here if a new node came + online, since each node is authoritative about itself, and we would want + to incorporate its information into the CIB. + + Once we have the latest CIB, we then enter the S_POLICY_ENGINE state + where invoke the scheduler. It is possible that between + invoking the scheduler and receiving an answer, that we receive + more input. In this case, we would discard the orginal result and + invoke it again. + + Once we are satisfied with the output from the scheduler, we + enter S_TRANSITION_ENGINE and feed the scheduler's output to the + Transition Engine who attempts to make the scheduler's + calculation a reality. If the transition completes successfully, + we enter S_IDLE, otherwise we go back to S_POLICY_ENGINE with the + current unstable state and try again. + + Of course, we may be asked to shutdown at any time, however we must + progress to S_NOT_DC before doing so. Once we have handed over DC + duties to another node, we can then shut down like everyone else, + that is, by asking the DC for permission and waiting for it to take all + our resources away. + + The case where we are the DC and the only node in the cluster is a + special case and handled as an escalation which takes us to + S_SHUTDOWN. Similarly, if any other point in the shutdown + fails or stalls, this is escalated and we end up in S_TERMINATE. + + At any point, the controller can relay messages for its subsystems, + but outbound messages (from subsystems) should probably be blocked + until S_INTEGRATION (for the DC) or the join protocol has + completed (for non-DC controllers). +*/ + +/*====================================== + * + * Inputs/Events/Stimuli to be given to the finite state machine + * + * Some of these a true events, and others are synthesised based on + * the "register" (see below) and the contents or source of messages. + * + * The machine keeps processing until receiving I_NULL + * + *======================================*/ +enum crmd_fsa_input { +/* 0 */ + I_NULL, /* Nothing happened */ +/* 1 */ + + I_CIB_OP, /* An update to the CIB occurred */ + I_CIB_UPDATE, /* An update to the CIB occurred */ + I_DC_TIMEOUT, /* We have lost communication with the DC */ + I_ELECTION, /* Someone started an election */ + I_PE_CALC, /* The scheduler needs to be invoked */ + I_RELEASE_DC, /* The election completed and we were not + * elected, but we were the DC beforehand + */ + I_ELECTION_DC, /* The election completed and we were (re-)elected + * DC + */ + I_ERROR, /* Something bad happened (more serious than + * I_FAIL) and may not have been due to the action + * being performed. For example, we may have lost + * our connection to the CIB. + */ +/* 9 */ + I_FAIL, /* The action failed to complete successfully */ + I_INTEGRATED, + I_FINALIZED, + I_NODE_JOIN, /* A node has entered the cluster */ + I_NOT_DC, /* We are not and were not the DC before or after + * the current operation or state + */ + I_RECOVERED, /* The recovery process completed successfully */ + I_RELEASE_FAIL, /* We could not give up DC status for some reason + */ + I_RELEASE_SUCCESS, /* We are no longer the DC */ + I_RESTART, /* The current set of actions needs to be + * restarted + */ + I_TE_SUCCESS, /* Some non-resource, non-cluster-layer action + * is required of us, e.g. ping + */ +/* 20 */ + I_ROUTER, /* Do our job as router and forward this to the + * right place + */ + I_SHUTDOWN, /* We are asking to shutdown */ + I_STOP, /* We have been told to shutdown */ + I_TERMINATE, /* Actually exit */ + I_STARTUP, + I_PE_SUCCESS, /* The action completed successfully */ + + I_JOIN_OFFER, /* The DC is offering membership */ + I_JOIN_REQUEST, /* The client is requesting membership */ + I_JOIN_RESULT, /* If not the DC: The result of a join request + * Else: A client is responding with its local state info + */ + + I_WAIT_FOR_EVENT, /* we may be waiting for an async task to "happen" + * and until it does, we can't do anything else + */ + + I_DC_HEARTBEAT, /* The DC is telling us that it is alive and well */ + + I_LRM_EVENT, + +/* 30 */ + I_PENDING, + I_HALT, + + /* ------------ Last input found in table is above ----------- */ + I_ILLEGAL /* This is an illegal value for an FSA input */ + /* (must be last) */ +}; + +# define MAXINPUT I_ILLEGAL + +# define I_MESSAGE I_ROUTER + +/*====================================== + * + * actions + * + * Some of the actions below will always occur together for now, but this may + * not always be the case, so they are split up so that they can easily be + * called independently in the future, if necessary. + * + * For example, separating A_LRM_CONNECT from A_STARTUP might be useful + * if we ever try to recover from a faulty or disconnected executor. + * + *======================================*/ + + /* Don't do anything */ +# define A_NOTHING 0x0000000000000000ULL + +/* -- Startup actions -- */ + /* Hook to perform any actions (other than connecting to other daemons) + * that might be needed as part of the startup. + */ +# define A_STARTUP 0x0000000000000001ULL + /* Hook to perform any actions that might be needed as part + * after startup is successful. + */ +# define A_STARTED 0x0000000000000002ULL + /* Connect to cluster layer */ +# define A_HA_CONNECT 0x0000000000000004ULL +# define A_HA_DISCONNECT 0x0000000000000008ULL + +# define A_INTEGRATE_TIMER_START 0x0000000000000010ULL +# define A_INTEGRATE_TIMER_STOP 0x0000000000000020ULL +# define A_FINALIZE_TIMER_START 0x0000000000000040ULL +# define A_FINALIZE_TIMER_STOP 0x0000000000000080ULL + +/* -- Election actions -- */ +# define A_DC_TIMER_START 0x0000000000000100ULL +# define A_DC_TIMER_STOP 0x0000000000000200ULL +# define A_ELECTION_COUNT 0x0000000000000400ULL +# define A_ELECTION_VOTE 0x0000000000000800ULL + +# define A_ELECTION_START 0x0000000000001000ULL + +/* -- Message processing -- */ + /* Process the queue of requests */ +# define A_MSG_PROCESS 0x0000000000002000ULL + /* Send the message to the correct recipient */ +# define A_MSG_ROUTE 0x0000000000004000ULL + + /* Send a welcome message to new node(s) */ +# define A_DC_JOIN_OFFER_ONE 0x0000000000008000ULL + +/* -- Server Join protocol actions -- */ + /* Send a welcome message to all nodes */ +# define A_DC_JOIN_OFFER_ALL 0x0000000000010000ULL + /* Process the remote node's ack of our join message */ +# define A_DC_JOIN_PROCESS_REQ 0x0000000000020000ULL + /* Send out the results of the Join phase */ +# define A_DC_JOIN_FINALIZE 0x0000000000040000ULL + /* Send out the results of the Join phase */ +# define A_DC_JOIN_PROCESS_ACK 0x0000000000080000ULL + +/* -- Client Join protocol actions -- */ +# define A_CL_JOIN_QUERY 0x0000000000100000ULL +# define A_CL_JOIN_ANNOUNCE 0x0000000000200000ULL + /* Request membership to the DC list */ +# define A_CL_JOIN_REQUEST 0x0000000000400000ULL + /* Did the DC accept or reject the request */ +# define A_CL_JOIN_RESULT 0x0000000000800000ULL + +/* -- Recovery, DC start/stop -- */ + /* Something bad happened, try to recover */ +# define A_RECOVER 0x0000000001000000ULL + /* Hook to perform any actions (apart from starting, the TE, scheduler, + * and gathering the latest CIB) that might be necessary before + * giving up the responsibilities of being the DC. + */ +# define A_DC_RELEASE 0x0000000002000000ULL + /* */ +# define A_DC_RELEASED 0x0000000004000000ULL + /* Hook to perform any actions (apart from starting, the TE, scheduler, + * and gathering the latest CIB) that might be necessary before + * taking over the responsibilities of being the DC. + */ +# define A_DC_TAKEOVER 0x0000000008000000ULL + +/* -- Shutdown actions -- */ +# define A_SHUTDOWN 0x0000000010000000ULL +# define A_STOP 0x0000000020000000ULL +# define A_EXIT_0 0x0000000040000000ULL +# define A_EXIT_1 0x0000000080000000ULL + +# define A_SHUTDOWN_REQ 0x0000000100000000ULL +# define A_ELECTION_CHECK 0x0000000200000000ULL +# define A_DC_JOIN_FINAL 0x0000000400000000ULL + +/* -- CIB actions -- */ +# define A_CIB_START 0x0000020000000000ULL +# define A_CIB_STOP 0x0000040000000000ULL + +/* -- Transition Engine actions -- */ + /* Attempt to reach the newly calculated cluster state. This is + * only called once per transition (except if it is asked to + * stop the transition or start a new one). + * Once given a cluster state to reach, the TE will determine + * tasks that can be performed in parallel, execute them, wait + * for replies and then determine the next set until the new + * state is reached or no further tasks can be taken. + */ +# define A_TE_INVOKE 0x0000100000000000ULL +# define A_TE_START 0x0000200000000000ULL +# define A_TE_STOP 0x0000400000000000ULL +# define A_TE_CANCEL 0x0000800000000000ULL +# define A_TE_HALT 0x0001000000000000ULL + +/* -- Scheduler actions -- */ + /* Calculate the next state for the cluster. This is only + * invoked once per needed calculation. + */ +# define A_PE_INVOKE 0x0002000000000000ULL +# define A_PE_START 0x0004000000000000ULL +# define A_PE_STOP 0x0008000000000000ULL +/* -- Misc actions -- */ + /* Add a system generate "block" so that resources arent moved + * to or are activly moved away from the affected node. This + * way we can return quickly even if busy with other things. + */ +# define A_NODE_BLOCK 0x0010000000000000ULL + /* Update our information in the local CIB */ +# define A_UPDATE_NODESTATUS 0x0020000000000000ULL +# define A_READCONFIG 0x0080000000000000ULL + +/* -- LRM Actions -- */ + /* Connect to pacemaker-execd */ +# define A_LRM_CONNECT 0x0100000000000000ULL + /* Disconnect from pacemaker-execd */ +# define A_LRM_DISCONNECT 0x0200000000000000ULL +# define A_LRM_INVOKE 0x0400000000000000ULL +# define A_LRM_EVENT 0x0800000000000000ULL + +/* -- Logging actions -- */ +# define A_LOG 0x1000000000000000ULL +# define A_ERROR 0x2000000000000000ULL +# define A_WARN 0x4000000000000000ULL + +# define O_EXIT (A_SHUTDOWN|A_STOP|A_LRM_DISCONNECT|A_HA_DISCONNECT|A_EXIT_0|A_CIB_STOP) +# define O_RELEASE (A_DC_TIMER_STOP|A_DC_RELEASE|A_PE_STOP|A_TE_STOP|A_DC_RELEASED) +# define O_PE_RESTART (A_PE_START|A_PE_STOP) +# define O_TE_RESTART (A_TE_START|A_TE_STOP) +# define O_CIB_RESTART (A_CIB_START|A_CIB_STOP) +# define O_LRM_RECONNECT (A_LRM_CONNECT|A_LRM_DISCONNECT) +# define O_DC_TIMER_RESTART (A_DC_TIMER_STOP|A_DC_TIMER_START) +/*====================================== + * + * "register" contents + * + * Things we may want to remember regardless of which state we are in. + * + * These also count as inputs for synthesizing I_* + * + *======================================*/ +# define R_THE_DC 0x00000001ULL + /* Are we the DC? */ +# define R_STARTING 0x00000002ULL + /* Are we starting up? */ +# define R_SHUTDOWN 0x00000004ULL + /* Are we trying to shut down? */ +# define R_STAYDOWN 0x00000008ULL + /* Should we restart? */ + +# define R_JOIN_OK 0x00000010ULL /* Have we completed the join process */ +# define R_READ_CONFIG 0x00000040ULL +# define R_INVOKE_PE 0x00000080ULL // Should the scheduler be invoked? + +# define R_CIB_CONNECTED 0x00000100ULL + /* Is the CIB connected? */ +# define R_PE_CONNECTED 0x00000200ULL // Is the scheduler connected? +# define R_TE_CONNECTED 0x00000400ULL + /* Is the Transition Engine connected? */ +# define R_LRM_CONNECTED 0x00000800ULL // Is pacemaker-execd connected? + +# define R_CIB_REQUIRED 0x00001000ULL + /* Is the CIB required? */ +# define R_PE_REQUIRED 0x00002000ULL // Is the scheduler required? +# define R_TE_REQUIRED 0x00004000ULL + /* Is the Transition Engine required? */ +# define R_ST_REQUIRED 0x00008000ULL + /* Is the Stonith daemon required? */ + +# define R_CIB_DONE 0x00010000ULL + /* Have we calculated the CIB? */ +# define R_HAVE_CIB 0x00020000ULL /* Do we have an up-to-date CIB */ + +# define R_MEMBERSHIP 0x00100000ULL /* Have we got cluster layer data yet */ +# define R_PEER_DATA 0x00200000ULL /* Have we got T_CL_STATUS data yet */ + +# define R_HA_DISCONNECTED 0x00400000ULL /* did we sign out of our own accord */ + +# define R_REQ_PEND 0x01000000ULL + /* Are there Requests waiting for + processing? */ +# define R_PE_PEND 0x02000000ULL // Are we awaiting reply from scheduler? +# define R_TE_PEND 0x04000000ULL + /* Has the TE been invoked and we're + awaiting completion? */ +# define R_RESP_PEND 0x08000000ULL + /* Do we have clients waiting on a + response? if so perhaps we shouldn't + stop yet */ + +# define R_SENT_RSC_STOP 0x20000000ULL /* Have we sent a stop action to all + * resources in preparation for + * shutting down */ + +# define R_IN_RECOVERY 0x80000000ULL + +#define CRM_DIRECT_NACK_RC (99) // Deprecated (see PCMK_EXEC_INVALID) + +enum crmd_fsa_cause { + C_UNKNOWN = 0, + C_STARTUP, + C_IPC_MESSAGE, + C_HA_MESSAGE, + C_CRMD_STATUS_CALLBACK, + C_LRM_OP_CALLBACK, + C_TIMER_POPPED, + C_SHUTDOWN, + C_FSA_INTERNAL, +}; + +enum fsa_data_type { + fsa_dt_none, + fsa_dt_ha_msg, + fsa_dt_xml, + fsa_dt_lrm, +}; + +typedef struct fsa_data_s fsa_data_t; +struct fsa_data_s { + int id; + enum crmd_fsa_input fsa_input; + enum crmd_fsa_cause fsa_cause; + uint64_t actions; + const char *origin; + void *data; + enum fsa_data_type data_type; +}; + +#define controld_set_fsa_input_flags(flags_to_set) do { \ + controld_globals.fsa_input_register \ + = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ + "FSA input", "controller", \ + controld_globals.fsa_input_register, \ + (flags_to_set), #flags_to_set); \ + } while (0) + +#define controld_clear_fsa_input_flags(flags_to_clear) do { \ + controld_globals.fsa_input_register \ + = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ + "FSA input", "controller", \ + controld_globals.fsa_input_register, \ + (flags_to_clear), \ + #flags_to_clear); \ + } while (0) + +#define controld_set_fsa_action_flags(flags_to_set) do { \ + controld_globals.fsa_actions \ + = pcmk__set_flags_as(__func__, __LINE__, LOG_DEBUG, \ + "FSA action", "controller", \ + controld_globals.fsa_actions, \ + (flags_to_set), #flags_to_set); \ + } while (0) + +#define controld_clear_fsa_action_flags(flags_to_clear) do { \ + controld_globals.fsa_actions \ + = pcmk__clear_flags_as(__func__, __LINE__, LOG_DEBUG, \ + "FSA action", "controller", \ + controld_globals.fsa_actions, \ + (flags_to_clear), #flags_to_clear); \ + } while (0) + +// This should be moved elsewhere +xmlNode *controld_query_executor_state(void); + +const char *fsa_input2string(enum crmd_fsa_input input); +const char *fsa_state2string(enum crmd_fsa_state state); +const char *fsa_cause2string(enum crmd_fsa_cause cause); +const char *fsa_action2string(long long action); + +enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause); + +enum crmd_fsa_state controld_fsa_get_next_state(enum crmd_fsa_input input); + +uint64_t controld_fsa_get_action(enum crmd_fsa_input input); + +void controld_init_fsa_trigger(void); +void controld_destroy_fsa_trigger(void); + +void free_max_generation(void); + +# define AM_I_DC pcmk_is_set(controld_globals.fsa_input_register, R_THE_DC) +# define controld_trigger_fsa() controld_trigger_fsa_as(__func__, __LINE__) + +void controld_trigger_fsa_as(const char *fn, int line); + +/* A_READCONFIG */ +void do_read_config(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t *msg_data); + +/* A_PE_INVOKE */ +void do_pe_invoke(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t *msg_data); + +/* A_LOG */ +void do_log(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_STARTUP */ +void do_startup(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_CIB_START, STOP, RESTART */ +void do_cib_control(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_HA_CONNECT */ +void do_ha_control(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_LRM_CONNECT */ +void do_lrm_control(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_PE_START, STOP, RESTART */ +void do_pe_control(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_TE_START, STOP, RESTART */ +void do_te_control(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_STARTED */ +void do_started(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_MSG_ROUTE */ +void do_msg_route(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_RECOVER */ +void do_recover(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_ELECTION_VOTE */ +void do_election_vote(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_ELECTION_COUNT */ +void do_election_count_vote(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, + fsa_data_t *msg_data); + +/* A_ELECTION_CHECK */ +void do_election_check(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_DC_TIMER_STOP */ +void do_timer_control(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_DC_TAKEOVER */ +void do_dc_takeover(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_DC_RELEASE */ +void do_dc_release(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_DC_JOIN_OFFER_ALL */ +void do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_DC_JOIN_OFFER_ONE */ +void do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_DC_JOIN_ACK */ +void do_dc_join_ack(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_DC_JOIN_REQ */ +void do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, + fsa_data_t *msg_data); + +/* A_DC_JOIN_FINALIZE */ +void do_dc_join_finalize(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_CL_JOIN_QUERY */ +/* is there a DC out there? */ +void do_cl_join_query(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t *msg_data); + +/* A_CL_JOIN_ANNOUNCE */ +void do_cl_join_announce(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t *msg_data); + +/* A_CL_JOIN_REQUEST */ +void do_cl_join_offer_respond(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, + fsa_data_t *msg_data); + +/* A_CL_JOIN_RESULT */ +void do_cl_join_finalize_respond(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, + fsa_data_t *msg_data); + +/* A_LRM_INVOKE */ +void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_LRM_EVENT */ +void do_lrm_event(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_TE_INVOKE, A_TE_CANCEL */ +void do_te_invoke(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_SHUTDOWN_REQ */ +void do_shutdown_req(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_SHUTDOWN */ +void do_shutdown(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_STOP */ +void do_stop(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_EXIT_0, A_EXIT_1 */ +void do_exit(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_DC_JOIN_FINAL */ +void do_dc_join_final(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t *msg_data); +#endif diff --git a/daemons/controld/controld_globals.h b/daemons/controld/controld_globals.h new file mode 100644 index 0000000..eff1607 --- /dev/null +++ b/daemons/controld/controld_globals.h @@ -0,0 +1,143 @@ +/* + * Copyright 2022-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef CONTROLD_GLOBALS__H +# define CONTROLD_GLOBALS__H + +#include <crm_internal.h> // pcmk__output_t, etc. + +#include <stdint.h> // uint32_t, uint64_t +#include <glib.h> // GList, GMainLoop +#include <crm/cib.h> // cib_t +#include <pacemaker-internal.h> // pcmk__graph_t +#include <controld_fsa.h> // enum crmd_fsa_state + +typedef struct { + // Booleans + + //! Group of \p controld_flags values + uint32_t flags; + + + // Controller FSA + + //! FSA state + enum crmd_fsa_state fsa_state; + + //! FSA actions (group of \p A_* flags) + uint64_t fsa_actions; + + //! FSA input register contents (group of \p R_* flags) + uint64_t fsa_input_register; + + //! FSA message queue + GList *fsa_message_queue; + + + // CIB + + //! Connection to the CIB + cib_t *cib_conn; + + //! CIB connection's client ID + const char *cib_client_id; + + + // Scheduler + + //! Reference of the scheduler request being waited on + char *fsa_pe_ref; + + + // Transitioner + + //! Transitioner UUID + char *te_uuid; + + //! Graph of transition currently being processed + pcmk__graph_t *transition_graph; + + + // Logging + + //! Output object for controller log messages + pcmk__output_t *logger_out; + + + // Other + + //! Cluster name + char *cluster_name; + + //! Designated controller name + char *dc_name; + + //! Designated controller's Pacemaker version + char *dc_version; + + //! Local node's node name + char *our_nodename; + + //! Local node's UUID + char *our_uuid; + + //! Last saved cluster communication layer membership ID + unsigned long long membership_id; + + //! Max lifetime (in seconds) of a resource's shutdown lock to a node + guint shutdown_lock_limit; + + //! Main event loop + GMainLoop *mainloop; +} controld_globals_t; + +extern controld_globals_t controld_globals; + +/*! + * \internal + * \enum controld_flags + * \brief Bit flags to store various controller state and configuration info + */ +enum controld_flags { + //! The DC left in a membership change that is being processed + controld_dc_left = (1 << 0), + + //! The FSA is stalled waiting for further input + controld_fsa_is_stalled = (1 << 1), + + //! The local node has been in a quorate partition at some point + controld_ever_had_quorum = (1 << 2), + + //! The local node is currently in a quorate partition + controld_has_quorum = (1 << 3), + + //! Panic the local node if it loses quorum + controld_no_quorum_suicide = (1 << 4), + + //! Lock resources to the local node when it shuts down cleanly + controld_shutdown_lock_enabled = (1 << 5), +}; + +# define controld_set_global_flags(flags_to_set) do { \ + controld_globals.flags = pcmk__set_flags_as(__func__, __LINE__, \ + LOG_TRACE, \ + "Global", "controller", \ + controld_globals.flags, \ + (flags_to_set), \ + #flags_to_set); \ + } while (0) + +# define controld_clear_global_flags(flags_to_clear) do { \ + controld_globals.flags \ + = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Global", \ + "controller", controld_globals.flags, \ + (flags_to_clear), #flags_to_clear); \ + } while (0) + +#endif // ifndef CONTROLD_GLOBALS__H diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c new file mode 100644 index 0000000..da6a9d6 --- /dev/null +++ b/daemons/controld/controld_join_client.c @@ -0,0 +1,366 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <crm/crm.h> +#include <crm/cib.h> +#include <crm/msg_xml.h> +#include <crm/common/xml.h> + +#include <pacemaker-controld.h> + +void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); + +extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig); + +/*! + * \internal + * \brief Remember if DC is shutting down as we join + * + * If we're joining while the current DC is shutting down, update its expected + * state, so we don't fence it if we become the new DC. (We weren't a peer + * when it broadcast its shutdown request.) + * + * \param[in] msg A join message from the DC + */ +static void +update_dc_expected(const xmlNode *msg) +{ + if ((controld_globals.dc_name != NULL) + && pcmk__xe_attr_is_true(msg, F_CRM_DC_LEAVING)) { + crm_node_t *dc_node = crm_get_peer(0, controld_globals.dc_name); + + pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_DOWN); + } +} + +/* A_CL_JOIN_QUERY */ +/* is there a DC out there? */ +void +do_cl_join_query(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL, + CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); + + sleep(1); // Give the cluster layer time to propagate to the DC + update_dc(NULL); /* Unset any existing value so that the result is not discarded */ + crm_debug("Querying for a DC"); + send_cluster_message(NULL, crm_msg_crmd, req, FALSE); + free_xml(req); +} + +/* A_CL_JOIN_ANNOUNCE */ + +/* this is kind of a workaround for the fact that we may not be around or + * are otherwise unable to reply when the DC sends out A_DC_JOIN_OFFER_ALL + */ +void +do_cl_join_announce(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + /* don't announce if we're in one of these states */ + if (cur_state != S_PENDING) { + crm_warn("Not announcing cluster join because in state %s", + fsa_state2string(cur_state)); + return; + } + + if (!pcmk_is_set(controld_globals.fsa_input_register, R_STARTING)) { + /* send as a broadcast */ + xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL, + CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); + + crm_debug("Announcing availability"); + update_dc(NULL); + send_cluster_message(NULL, crm_msg_crmd, req, FALSE); + free_xml(req); + + } else { + /* Delay announce until we have finished local startup */ + crm_warn("Delaying announce of cluster join until local startup is complete"); + return; + } +} + +static int query_call_id = 0; + +/* A_CL_JOIN_REQUEST */ +/* aka. accept the welcome offer */ +void +do_cl_join_offer_respond(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + cib_t *cib_conn = controld_globals.cib_conn; + + ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); + const char *welcome_from; + const char *join_id; + + CRM_CHECK(input != NULL, return); + +#if 0 + if (we are sick) { + log error; + + /* save the request for later? */ + return; + } +#endif + + welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM); + join_id = crm_element_value(input->msg, F_CRM_JOIN_ID); + crm_trace("Accepting cluster join offer from node %s "CRM_XS" join-%s", + welcome_from, crm_element_value(input->msg, F_CRM_JOIN_ID)); + + /* we only ever want the last one */ + if (query_call_id > 0) { + crm_trace("Cancelling previous join query: %d", query_call_id); + remove_cib_op_callback(query_call_id, FALSE); + query_call_id = 0; + } + + if (update_dc(input->msg) == FALSE) { + crm_warn("Discarding cluster join offer from node %s (expected %s)", + welcome_from, controld_globals.dc_name); + return; + } + + update_dc_expected(input->msg); + + query_call_id = cib_conn->cmds->query(cib_conn, NULL, NULL, + cib_scope_local|cib_no_children); + fsa_register_cib_callback(query_call_id, strdup(join_id), + join_query_callback); + crm_trace("Registered join query callback: %d", query_call_id); + + controld_set_fsa_action_flags(A_DC_TIMER_STOP); + controld_trigger_fsa(); +} + +void +join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + char *join_id = user_data; + xmlNode *generation = create_xml_node(NULL, XML_CIB_TAG_GENERATION_TUPPLE); + + CRM_LOG_ASSERT(join_id != NULL); + + if (query_call_id != call_id) { + crm_trace("Query %d superseded", call_id); + goto done; + } + + query_call_id = 0; + if(rc != pcmk_ok || output == NULL) { + crm_err("Could not retrieve version details for join-%s: %s (%d)", + join_id, pcmk_strerror(rc), rc); + register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); + + } else if (controld_globals.dc_name == NULL) { + crm_debug("Membership is in flux, not continuing join-%s", join_id); + + } else { + xmlNode *reply = NULL; + + crm_debug("Respond to join offer join-%s from %s", + join_id, controld_globals.dc_name); + copy_in_properties(generation, output); + + reply = create_request(CRM_OP_JOIN_REQUEST, generation, + controld_globals.dc_name, CRM_SYSTEM_DC, + CRM_SYSTEM_CRMD, NULL); + + crm_xml_add(reply, F_CRM_JOIN_ID, join_id); + crm_xml_add(reply, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); + send_cluster_message(crm_get_peer(0, controld_globals.dc_name), + crm_msg_crmd, reply, TRUE); + free_xml(reply); + } + + done: + free_xml(generation); +} + +static void +set_join_state(const char * start_state) +{ + if (pcmk__str_eq(start_state, "standby", pcmk__str_casei)) { + crm_notice("Forcing node %s to join in %s state per configured " + "environment", controld_globals.our_nodename, start_state); + cib__update_node_attr(controld_globals.logger_out, + controld_globals.cib_conn, cib_sync_call, + XML_CIB_TAG_NODES, controld_globals.our_uuid, + NULL, NULL, NULL, "standby", "on", NULL, NULL); + + } else if (pcmk__str_eq(start_state, "online", pcmk__str_casei)) { + crm_notice("Forcing node %s to join in %s state per configured " + "environment", controld_globals.our_nodename, start_state); + cib__update_node_attr(controld_globals.logger_out, + controld_globals.cib_conn, cib_sync_call, + XML_CIB_TAG_NODES, controld_globals.our_uuid, + NULL, NULL, NULL, "standby", "off", NULL, NULL); + + } else if (pcmk__str_eq(start_state, "default", pcmk__str_casei)) { + crm_debug("Not forcing a starting state on node %s", + controld_globals.our_nodename); + + } else { + crm_warn("Unrecognized start state '%s', using 'default' (%s)", + start_state, controld_globals.our_nodename); + } +} + +static int +update_conn_host_cache(xmlNode *node, void *userdata) +{ + const char *remote = crm_element_value(node, XML_ATTR_ID); + const char *conn_host = crm_element_value(node, PCMK__XA_CONN_HOST); + const char *state = crm_element_value(node, XML_CIB_TAG_STATE); + + crm_node_t *remote_peer = crm_remote_peer_get(remote); + + if (remote_peer == NULL) { + return pcmk_rc_ok; + } + + if (conn_host != NULL) { + pcmk__str_update(&remote_peer->conn_host, conn_host); + } + + if (state != NULL) { + pcmk__update_peer_state(__func__, remote_peer, state, 0); + } + + return pcmk_rc_ok; +} + +/* A_CL_JOIN_RESULT */ +/* aka. this is notification that we have (or have not) been accepted */ +void +do_cl_join_finalize_respond(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + xmlNode *tmp1 = NULL; + gboolean was_nack = TRUE; + static gboolean first_join = TRUE; + ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); + const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE); + + int join_id = -1; + const char *op = crm_element_value(input->msg, F_CRM_TASK); + const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM); + + if (!pcmk__str_eq(op, CRM_OP_JOIN_ACKNAK, pcmk__str_casei)) { + crm_trace("Ignoring op=%s message", op); + return; + } + + /* calculate if it was an ack or a nack */ + if (pcmk__xe_attr_is_true(input->msg, CRM_OP_JOIN_ACKNAK)) { + was_nack = FALSE; + } + + crm_element_value_int(input->msg, F_CRM_JOIN_ID, &join_id); + + if (was_nack) { + crm_err("Shutting down because cluster join with leader %s failed " + CRM_XS" join-%d NACK'd", welcome_from, join_id); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + controld_set_fsa_input_flags(R_STAYDOWN); + return; + } + + if (!AM_I_DC + && pcmk__str_eq(welcome_from, controld_globals.our_nodename, + pcmk__str_casei)) { + crm_warn("Discarding our own welcome - we're no longer the DC"); + return; + } + + if (update_dc(input->msg) == FALSE) { + crm_warn("Discarding %s from node %s (expected from %s)", + op, welcome_from, controld_globals.dc_name); + return; + } + + update_dc_expected(input->msg); + + /* record the node's feature set as a transient attribute */ + update_attrd(controld_globals.our_nodename, CRM_ATTR_FEATURE_SET, + CRM_FEATURE_SET, NULL, FALSE); + + /* send our status section to the DC */ + tmp1 = controld_query_executor_state(); + if (tmp1 != NULL) { + xmlNode *remotes = NULL; + xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, + controld_globals.dc_name, CRM_SYSTEM_DC, + CRM_SYSTEM_CRMD, NULL); + + crm_xml_add_int(reply, F_CRM_JOIN_ID, join_id); + + crm_debug("Confirming join-%d: sending local operation history to %s", + join_id, controld_globals.dc_name); + + /* + * If this is the node's first join since the controller started on it, + * set its initial state (standby or member) according to the user's + * preference. + * + * We do not clear the LRM history here. Even if the DC failed to do it + * when we last left, removing them here creates a race condition if the + * controller is being recovered. Instead of a list of active resources + * from the executor, we may end up with a blank status section. If we + * are _NOT_ lucky, we will probe for the "wrong" instance of anonymous + * clones and end up with multiple active instances on the machine. + */ + if (first_join + && !pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + + first_join = FALSE; + if (start_state) { + set_join_state(start_state); + } + } + + send_cluster_message(crm_get_peer(0, controld_globals.dc_name), + crm_msg_crmd, reply, TRUE); + free_xml(reply); + + if (AM_I_DC == FALSE) { + register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, + __func__); + } + + free_xml(tmp1); + + /* Update the remote node cache with information about which node + * is hosting the connection. + */ + remotes = pcmk__xe_match(input->msg, XML_CIB_TAG_NODES, NULL, NULL); + if (remotes != NULL) { + pcmk__xe_foreach_child(remotes, XML_CIB_TAG_NODE, update_conn_host_cache, NULL); + } + + } else { + crm_err("Could not confirm join-%d with %s: Local operation history " + "failed", join_id, controld_globals.dc_name); + register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); + } +} diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c new file mode 100644 index 0000000..f82b132 --- /dev/null +++ b/daemons/controld/controld_join_dc.c @@ -0,0 +1,987 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <crm/crm.h> + +#include <crm/msg_xml.h> +#include <crm/common/xml.h> +#include <crm/cluster.h> + +#include <pacemaker-controld.h> + +static char *max_generation_from = NULL; +static xmlNodePtr max_generation_xml = NULL; + +/*! + * \internal + * \brief Nodes from which a CIB sync has failed since the peer joined + * + * This table is of the form (<tt>node_name -> join_id</tt>). \p node_name is + * the name of a client node from which a CIB \p sync_from() call has failed in + * \p do_dc_join_finalize() since the client joined the cluster as a peer. + * \p join_id is the ID of the join round in which the \p sync_from() failed, + * and is intended for use in nack log messages. + */ +static GHashTable *failed_sync_nodes = NULL; + +void finalize_join_for(gpointer key, gpointer value, gpointer user_data); +void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); +gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); + +/* Numeric counter used to identify join rounds (an unsigned int would be + * appropriate, except we get and set it in XML as int) + */ +static int current_join_id = 0; + +/*! + * \internal + * \brief Destroy the hash table containing failed sync nodes + */ +void +controld_destroy_failed_sync_table(void) +{ + if (failed_sync_nodes != NULL) { + g_hash_table_destroy(failed_sync_nodes); + failed_sync_nodes = NULL; + } +} + +/*! + * \internal + * \brief Remove a node from the failed sync nodes table if present + * + * \param[in] node_name Node name to remove + */ +void +controld_remove_failed_sync_node(const char *node_name) +{ + if (failed_sync_nodes != NULL) { + g_hash_table_remove(failed_sync_nodes, (gchar *) node_name); + } +} + +/*! + * \internal + * \brief Add to a hash table a node whose CIB failed to sync + * + * \param[in] node_name Name of node whose CIB failed to sync + * \param[in] join_id Join round when the failure occurred + */ +static void +record_failed_sync_node(const char *node_name, gint join_id) +{ + if (failed_sync_nodes == NULL) { + failed_sync_nodes = pcmk__strikey_table(g_free, NULL); + } + + /* If the node is already in the table then we failed to nack it during the + * filter offer step + */ + CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name), + GINT_TO_POINTER(join_id))); +} + +/*! + * \internal + * \brief Look up a node name in the failed sync table + * + * \param[in] node_name Name of node to look up + * \param[out] join_id Where to store the join ID of when the sync failed + * + * \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the + * node name was found, or \p pcmk_rc_node_unknown otherwise. + * \note \p *join_id is set to -1 if the node is not found. + */ +static int +lookup_failed_sync_node(const char *node_name, gint *join_id) +{ + *join_id = -1; + + if (failed_sync_nodes != NULL) { + gpointer result = g_hash_table_lookup(failed_sync_nodes, + (gchar *) node_name); + if (result != NULL) { + *join_id = GPOINTER_TO_INT(result); + return pcmk_rc_ok; + } + } + return pcmk_rc_node_unknown; +} + +void +crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase) +{ + enum crm_join_phase last = 0; + + CRM_CHECK(node != NULL, return); + + /* Remote nodes do not participate in joins */ + if (pcmk_is_set(node->flags, crm_remote_node)) { + return; + } + + last = node->join; + + if(phase == last) { + crm_trace("Node %s join-%d phase is still %s " + CRM_XS " nodeid=%u source=%s", + node->uname, current_join_id, crm_join_phase_str(last), + node->id, source); + + } else if ((phase <= crm_join_none) || (phase == (last + 1))) { + node->join = phase; + crm_trace("Node %s join-%d phase is now %s (was %s) " + CRM_XS " nodeid=%u source=%s", + node->uname, current_join_id, crm_join_phase_str(phase), + crm_join_phase_str(last), node->id, source); + + } else { + crm_warn("Rejecting join-%d phase update for node %s because " + "can't go from %s to %s " CRM_XS " nodeid=%u source=%s", + current_join_id, node->uname, crm_join_phase_str(last), + crm_join_phase_str(phase), node->id, source); + } +} + +static void +start_join_round(void) +{ + GHashTableIter iter; + crm_node_t *peer = NULL; + + crm_debug("Starting new join round join-%d", current_join_id); + + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { + crm_update_peer_join(__func__, peer, crm_join_none); + } + if (max_generation_from != NULL) { + free(max_generation_from); + max_generation_from = NULL; + } + if (max_generation_xml != NULL) { + free_xml(max_generation_xml); + max_generation_xml = NULL; + } + controld_clear_fsa_input_flags(R_HAVE_CIB); + controld_forget_all_cib_replace_calls(); +} + +/*! + * \internal + * \brief Create a join message from the DC + * + * \param[in] join_op Join operation name + * \param[in] host_to Recipient of message + */ +static xmlNode * +create_dc_message(const char *join_op, const char *host_to) +{ + xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD, + CRM_SYSTEM_DC, NULL); + + /* Identify which election this is a part of */ + crm_xml_add_int(msg, F_CRM_JOIN_ID, current_join_id); + + /* Add a field specifying whether the DC is shutting down. This keeps the + * joining node from fencing the old DC if it becomes the new DC. + */ + pcmk__xe_set_bool_attr(msg, F_CRM_DC_LEAVING, + pcmk_is_set(controld_globals.fsa_input_register, + R_SHUTDOWN)); + return msg; +} + +static void +join_make_offer(gpointer key, gpointer value, gpointer user_data) +{ + xmlNode *offer = NULL; + crm_node_t *member = (crm_node_t *)value; + + CRM_ASSERT(member != NULL); + if (crm_is_peer_active(member) == FALSE) { + crm_info("Not making join-%d offer to inactive node %s", + current_join_id, + (member->uname? member->uname : "with unknown name")); + if(member->expected == NULL && pcmk__str_eq(member->state, CRM_NODE_LOST, pcmk__str_casei)) { + /* You would think this unsafe, but in fact this plus an + * active resource is what causes it to be fenced. + * + * Yes, this does mean that any node that dies at the same + * time as the old DC and is not running resource (still) + * won't be fenced. + * + * I'm not happy about this either. + */ + pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN); + } + return; + } + + if (member->uname == NULL) { + crm_info("Not making join-%d offer to node uuid %s with unknown name", + current_join_id, member->uuid); + return; + } + + if (controld_globals.membership_id != crm_peer_seq) { + controld_globals.membership_id = crm_peer_seq; + crm_info("Making join-%d offers based on membership event %llu", + current_join_id, crm_peer_seq); + } + + if(user_data && member->join > crm_join_none) { + crm_info("Not making join-%d offer to already known node %s (%s)", + current_join_id, member->uname, + crm_join_phase_str(member->join)); + return; + } + + crm_update_peer_join(__func__, (crm_node_t*)member, crm_join_none); + + offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname); + + // Advertise our feature set so the joining node can bail if not compatible + crm_xml_add(offer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); + + crm_info("Sending join-%d offer to %s", current_join_id, member->uname); + send_cluster_message(member, crm_msg_crmd, offer, TRUE); + free_xml(offer); + + crm_update_peer_join(__func__, member, crm_join_welcomed); +} + +/* A_DC_JOIN_OFFER_ALL */ +void +do_dc_join_offer_all(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + int count; + + /* Reset everyone's status back to down or in_ccm in the CIB. + * Any nodes that are active in the CIB but not in the cluster membership + * will be seen as offline by the scheduler anyway. + */ + current_join_id++; + start_join_round(); + + update_dc(NULL); + if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) { + crm_info("A new node joined the cluster"); + } + g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL); + + count = crmd_join_phase_count(crm_join_welcomed); + crm_info("Waiting on join-%d requests from %d outstanding node%s", + current_join_id, count, pcmk__plural_s(count)); + + // Don't waste time by invoking the scheduler yet +} + +/* A_DC_JOIN_OFFER_ONE */ +void +do_dc_join_offer_one(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + crm_node_t *member; + ha_msg_input_t *welcome = NULL; + int count; + const char *join_to = NULL; + + if (msg_data->data == NULL) { + crm_info("Making join-%d offers to any unconfirmed nodes " + "because an unknown node joined", current_join_id); + g_hash_table_foreach(crm_peer_cache, join_make_offer, &member); + check_join_state(cur_state, __func__); + return; + } + + welcome = fsa_typed_data(fsa_dt_ha_msg); + if (welcome == NULL) { + // fsa_typed_data() already logged an error + return; + } + + join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM); + if (join_to == NULL) { + crm_err("Can't make join-%d offer to unknown node", current_join_id); + return; + } + member = crm_get_peer(0, join_to); + + /* It is possible that a node will have been sick or starting up when the + * original offer was made. However, it will either re-announce itself in + * due course, or we can re-store the original offer on the client. + */ + + crm_update_peer_join(__func__, member, crm_join_none); + join_make_offer(NULL, member, NULL); + + /* If the offer isn't to the local node, make an offer to the local node as + * well, to ensure the correct value for max_generation_from. + */ + if (strcasecmp(join_to, controld_globals.our_nodename) != 0) { + member = crm_get_peer(0, controld_globals.our_nodename); + join_make_offer(NULL, member, NULL); + } + + /* This was a genuine join request; cancel any existing transition and + * invoke the scheduler. + */ + abort_transition(INFINITY, pcmk__graph_restart, "Node join", NULL); + + count = crmd_join_phase_count(crm_join_welcomed); + crm_info("Waiting on join-%d requests from %d outstanding node%s", + current_join_id, count, pcmk__plural_s(count)); + + // Don't waste time by invoking the scheduler yet +} + +static int +compare_int_fields(xmlNode * left, xmlNode * right, const char *field) +{ + const char *elem_l = crm_element_value(left, field); + const char *elem_r = crm_element_value(right, field); + + long long int_elem_l; + long long int_elem_r; + + pcmk__scan_ll(elem_l, &int_elem_l, -1LL); + pcmk__scan_ll(elem_r, &int_elem_r, -1LL); + + if (int_elem_l < int_elem_r) { + return -1; + + } else if (int_elem_l > int_elem_r) { + return 1; + } + + return 0; +} + +/* A_DC_JOIN_PROCESS_REQ */ +void +do_dc_join_filter_offer(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + xmlNode *generation = NULL; + + int cmp = 0; + int join_id = -1; + int count = 0; + gint value = 0; + gboolean ack_nack_bool = TRUE; + ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); + + const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); + const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE); + const char *join_version = crm_element_value(join_ack->msg, + XML_ATTR_CRM_VERSION); + crm_node_t *join_node = NULL; + + if (join_from == NULL) { + crm_err("Ignoring invalid join request without node name"); + return; + } + join_node = crm_get_peer(0, join_from); + + crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); + if (join_id != current_join_id) { + crm_debug("Ignoring join-%d request from %s because we are on join-%d", + join_id, join_from, current_join_id); + check_join_state(cur_state, __func__); + return; + } + + generation = join_ack->xml; + if (max_generation_xml != NULL && generation != NULL) { + int lpc = 0; + + const char *attributes[] = { + XML_ATTR_GENERATION_ADMIN, + XML_ATTR_GENERATION, + XML_ATTR_NUMUPDATES, + }; + + for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) { + cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]); + } + } + + if (ref == NULL) { + ref = "none"; // for logging only + } + + if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) { + crm_err("Rejecting join-%d request from node %s because we failed to " + "sync its CIB in join-%d " CRM_XS " ref=%s", + join_id, join_from, value, ref); + ack_nack_bool = FALSE; + + } else if (!crm_is_peer_active(join_node)) { + if (match_down_event(join_from) != NULL) { + /* The join request was received after the node was fenced or + * otherwise shutdown in a way that we're aware of. No need to log + * an error in this rare occurrence; we know the client was recently + * shut down, and receiving a lingering in-flight request is not + * cause for alarm. + */ + crm_debug("Rejecting join-%d request from inactive node %s " + CRM_XS " ref=%s", join_id, join_from, ref); + } else { + crm_err("Rejecting join-%d request from inactive node %s " + CRM_XS " ref=%s", join_id, join_from, ref); + } + ack_nack_bool = FALSE; + + } else if (generation == NULL) { + crm_err("Rejecting invalid join-%d request from node %s " + "missing CIB generation " CRM_XS " ref=%s", + join_id, join_from, ref); + ack_nack_bool = FALSE; + + } else if ((join_version == NULL) + || !feature_set_compatible(CRM_FEATURE_SET, join_version)) { + crm_err("Rejecting join-%d request from node %s because feature set %s" + " is incompatible with ours (%s) " CRM_XS " ref=%s", + join_id, join_from, (join_version? join_version : "pre-3.1.0"), + CRM_FEATURE_SET, ref); + ack_nack_bool = FALSE; + + } else if (max_generation_xml == NULL) { + const char *validation = crm_element_value(generation, + XML_ATTR_VALIDATION); + + if (get_schema_version(validation) < 0) { + crm_err("Rejecting join-%d request from %s (with first CIB " + "generation) due to unknown schema version %s " + CRM_XS " ref=%s", + join_id, join_from, validation, ref); + ack_nack_bool = FALSE; + + } else { + crm_debug("Accepting join-%d request from %s (with first CIB " + "generation) " CRM_XS " ref=%s", + join_id, join_from, ref); + max_generation_xml = copy_xml(generation); + pcmk__str_update(&max_generation_from, join_from); + } + + } else if ((cmp < 0) + || ((cmp == 0) + && pcmk__str_eq(join_from, controld_globals.our_nodename, + pcmk__str_casei))) { + const char *validation = crm_element_value(generation, + XML_ATTR_VALIDATION); + + if (get_schema_version(validation) < 0) { + crm_err("Rejecting join-%d request from %s (with better CIB " + "generation than current best from %s) due to unknown " + "schema version %s " CRM_XS " ref=%s", + join_id, join_from, max_generation_from, validation, ref); + ack_nack_bool = FALSE; + + } else { + crm_debug("Accepting join-%d request from %s (with better CIB " + "generation than current best from %s) " CRM_XS " ref=%s", + join_id, join_from, max_generation_from, ref); + crm_log_xml_debug(max_generation_xml, "Old max generation"); + crm_log_xml_debug(generation, "New max generation"); + + free_xml(max_generation_xml); + max_generation_xml = copy_xml(join_ack->xml); + pcmk__str_update(&max_generation_from, join_from); + } + + } else { + crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s", + join_id, join_from, ref); + } + + if (!ack_nack_bool) { + if (compare_version(join_version, "3.17.0") < 0) { + /* Clients with CRM_FEATURE_SET < 3.17.0 may respawn infinitely + * after a nack message, don't send one + */ + crm_update_peer_join(__func__, join_node, crm_join_nack_quiet); + } else { + crm_update_peer_join(__func__, join_node, crm_join_nack); + } + pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK); + + } else { + crm_update_peer_join(__func__, join_node, crm_join_integrated); + pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER); + } + + count = crmd_join_phase_count(crm_join_integrated); + crm_debug("%d node%s currently integrated in join-%d", + count, pcmk__plural_s(count), join_id); + + if (check_join_state(cur_state, __func__) == FALSE) { + // Don't waste time by invoking the scheduler yet + count = crmd_join_phase_count(crm_join_welcomed); + crm_debug("Waiting on join-%d requests from %d outstanding node%s", + join_id, count, pcmk__plural_s(count)); + } +} + +/* A_DC_JOIN_FINALIZE */ +void +do_dc_join_finalize(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + char *sync_from = NULL; + int rc = pcmk_ok; + int count_welcomed = crmd_join_phase_count(crm_join_welcomed); + int count_finalizable = crmd_join_phase_count(crm_join_integrated) + + crmd_join_phase_count(crm_join_nack) + + crmd_join_phase_count(crm_join_nack_quiet); + + /* This we can do straight away and avoid clients timing us out + * while we compute the latest CIB + */ + if (count_welcomed != 0) { + crm_debug("Waiting on join-%d requests from %d outstanding node%s " + "before finalizing join", current_join_id, count_welcomed, + pcmk__plural_s(count_welcomed)); + crmd_join_phase_log(LOG_DEBUG); + /* crmd_fsa_stall(FALSE); Needed? */ + return; + + } else if (count_finalizable == 0) { + crm_debug("Finalization not needed for join-%d at the current time", + current_join_id); + crmd_join_phase_log(LOG_DEBUG); + check_join_state(controld_globals.fsa_state, __func__); + return; + } + + controld_clear_fsa_input_flags(R_HAVE_CIB); + if (pcmk__str_eq(max_generation_from, controld_globals.our_nodename, + pcmk__str_null_matches|pcmk__str_casei)) { + controld_set_fsa_input_flags(R_HAVE_CIB); + } + + if (!controld_globals.transition_graph->complete) { + crm_warn("Delaying join-%d finalization while transition in progress", + current_join_id); + crmd_join_phase_log(LOG_DEBUG); + crmd_fsa_stall(FALSE); + return; + } + + if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { + // Send our CIB out to everyone + pcmk__str_update(&sync_from, controld_globals.our_nodename); + crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)", + current_join_id, count_finalizable, + pcmk__plural_s(count_finalizable)); + crm_log_xml_debug(max_generation_xml, "Requested CIB version"); + + } else { + // Ask for the agreed best CIB + pcmk__str_update(&sync_from, max_generation_from); + crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)", + current_join_id, count_finalizable, + pcmk__plural_s(count_finalizable), sync_from); + crm_log_xml_notice(max_generation_xml, "Requested CIB version"); + } + crmd_join_phase_log(LOG_DEBUG); + + rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn, + sync_from, NULL, cib_none); + + if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { + controld_record_cib_replace_call(rc); + } + fsa_register_cib_callback(rc, sync_from, finalize_sync_callback); +} + +void +free_max_generation(void) +{ + free(max_generation_from); + max_generation_from = NULL; + + free_xml(max_generation_xml); + max_generation_xml = NULL; +} + +void +finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + CRM_LOG_ASSERT(-EPERM != rc); + + controld_forget_cib_replace_call(call_id); + + if (rc != pcmk_ok) { + const char *sync_from = (const char *) user_data; + + do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR), + "Could not sync CIB from %s in join-%d: %s", + sync_from, current_join_id, pcmk_strerror(rc)); + + if (rc != -pcmk_err_old_data) { + record_failed_sync_node(sync_from, current_join_id); + } + + /* restart the whole join process */ + register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, + __func__); + + } else if (!AM_I_DC) { + crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id); + + } else if (controld_globals.fsa_state != S_FINALIZE_JOIN) { + crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN " + "(%s)", current_join_id, + fsa_state2string(controld_globals.fsa_state)); + + } else { + controld_set_fsa_input_flags(R_HAVE_CIB); + + /* make sure dc_uuid is re-set to us */ + if (!check_join_state(controld_globals.fsa_state, __func__)) { + int count_finalizable = 0; + + count_finalizable = crmd_join_phase_count(crm_join_integrated) + + crmd_join_phase_count(crm_join_nack) + + crmd_join_phase_count(crm_join_nack_quiet); + + crm_debug("Notifying %d node%s of join-%d results", + count_finalizable, pcmk__plural_s(count_finalizable), + current_join_id); + g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL); + } + } +} + +static void +join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + fsa_data_t *msg_data = NULL; + + if (rc == pcmk_ok) { + crm_debug("join-%d node history update (via CIB call %d) complete", + current_join_id, call_id); + check_join_state(controld_globals.fsa_state, __func__); + + } else { + crm_err("join-%d node history update (via CIB call %d) failed: %s " + "(next transition may determine resource status incorrectly)", + current_join_id, call_id, pcmk_strerror(rc)); + crm_log_xml_debug(msg, "failed"); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } +} + +/* A_DC_JOIN_PROCESS_ACK */ +void +do_dc_join_ack(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + int join_id = -1; + ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); + enum controld_section_e section = controld_section_lrm; + const int cib_opts = cib_scope_local|cib_can_create; + + const char *op = crm_element_value(join_ack->msg, F_CRM_TASK); + const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); + crm_node_t *peer = NULL; + + // Sanity checks + if (join_from == NULL) { + crm_warn("Ignoring message received without node identification"); + return; + } + if (op == NULL) { + crm_warn("Ignoring message received from %s without task", join_from); + return; + } + + if (strcmp(op, CRM_OP_JOIN_CONFIRM)) { + crm_debug("Ignoring '%s' message from %s while waiting for '%s'", + op, join_from, CRM_OP_JOIN_CONFIRM); + return; + } + + if (crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id) != 0) { + crm_warn("Ignoring join confirmation from %s without valid join ID", + join_from); + return; + } + + peer = crm_get_peer(0, join_from); + if (peer->join != crm_join_finalized) { + crm_info("Ignoring out-of-sequence join-%d confirmation from %s " + "(currently %s not %s)", + join_id, join_from, crm_join_phase_str(peer->join), + crm_join_phase_str(crm_join_finalized)); + return; + } + + if (join_id != current_join_id) { + crm_err("Rejecting join-%d confirmation from %s " + "because currently on join-%d", + join_id, join_from, current_join_id); + crm_update_peer_join(__func__, peer, crm_join_nack); + return; + } + + crm_update_peer_join(__func__, peer, crm_join_confirmed); + + /* Update CIB with node's current executor state. A new transition will be + * triggered later, when the CIB notifies us of the change. + */ + if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { + section = controld_section_lrm_unlocked; + } + controld_delete_node_state(join_from, section, cib_scope_local); + if (pcmk__str_eq(join_from, controld_globals.our_nodename, + pcmk__str_casei)) { + xmlNode *now_dc_lrmd_state = controld_query_executor_state(); + + if (now_dc_lrmd_state != NULL) { + crm_debug("Updating local node history for join-%d " + "from query result", join_id); + controld_update_cib(XML_CIB_TAG_STATUS, now_dc_lrmd_state, cib_opts, + join_update_complete_callback); + free_xml(now_dc_lrmd_state); + } else { + crm_warn("Updating local node history from join-%d confirmation " + "because query failed", join_id); + controld_update_cib(XML_CIB_TAG_STATUS, join_ack->xml, cib_opts, + join_update_complete_callback); + } + } else { + crm_debug("Updating node history for %s from join-%d confirmation", + join_from, join_id); + controld_update_cib(XML_CIB_TAG_STATUS, join_ack->xml, cib_opts, + join_update_complete_callback); + } +} + +void +finalize_join_for(gpointer key, gpointer value, gpointer user_data) +{ + xmlNode *acknak = NULL; + xmlNode *tmp1 = NULL; + crm_node_t *join_node = value; + const char *join_to = join_node->uname; + bool integrated = false; + + switch (join_node->join) { + case crm_join_integrated: + integrated = true; + break; + case crm_join_nack: + case crm_join_nack_quiet: + break; + default: + crm_trace("Not updating non-integrated and non-nacked node %s (%s) " + "for join-%d", join_to, + crm_join_phase_str(join_node->join), current_join_id); + return; + } + + /* Update the <node> element with the node's name and UUID, in case they + * weren't known before + */ + crm_trace("Updating node name and UUID in CIB for %s", join_to); + tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE); + set_uuid(tmp1, XML_ATTR_ID, join_node); + crm_xml_add(tmp1, XML_ATTR_UNAME, join_to); + fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1); + free_xml(tmp1); + + if (join_node->join == crm_join_nack_quiet) { + crm_trace("Not sending nack message to node %s with feature set older " + "than 3.17.0", join_to); + return; + } + + join_node = crm_get_peer(0, join_to); + if (!crm_is_peer_active(join_node)) { + /* + * NACK'ing nodes that the membership layer doesn't know about yet + * simply creates more churn + * + * Better to leave them waiting and let the join restart when + * the new membership event comes in + * + * All other NACKs (due to versions etc) should still be processed + */ + pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING); + return; + } + + // Acknowledge or nack node's join request + crm_debug("%sing join-%d request from %s", + integrated? "Acknowledg" : "Nack", current_join_id, join_to); + acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to); + pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated); + + if (integrated) { + // No change needed for a nacked node + crm_update_peer_join(__func__, join_node, crm_join_finalized); + pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER); + + /* Iterate through the remote peer cache and add information on which + * node hosts each to the ACK message. This keeps new controllers in + * sync with what has already happened. + */ + if (crm_remote_peer_cache_size() != 0) { + GHashTableIter iter; + crm_node_t *node = NULL; + xmlNode *remotes = create_xml_node(acknak, XML_CIB_TAG_NODES); + + g_hash_table_iter_init(&iter, crm_remote_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + xmlNode *remote = NULL; + + if (!node->conn_host) { + continue; + } + + remote = create_xml_node(remotes, XML_CIB_TAG_NODE); + pcmk__xe_set_props(remote, + XML_ATTR_ID, node->uname, + XML_CIB_TAG_STATE, node->state, + PCMK__XA_CONN_HOST, node->conn_host, + NULL); + } + } + } + send_cluster_message(join_node, crm_msg_crmd, acknak, TRUE); + free_xml(acknak); + return; +} + +gboolean +check_join_state(enum crmd_fsa_state cur_state, const char *source) +{ + static unsigned long long highest_seq = 0; + + if (controld_globals.membership_id != crm_peer_seq) { + crm_debug("join-%d: Membership changed from %llu to %llu " + CRM_XS " highest=%llu state=%s for=%s", + current_join_id, controld_globals.membership_id, crm_peer_seq, + highest_seq, fsa_state2string(cur_state), source); + if(highest_seq < crm_peer_seq) { + /* Don't spam the FSA with duplicates */ + highest_seq = crm_peer_seq; + register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); + } + + } else if (cur_state == S_INTEGRATION) { + if (crmd_join_phase_count(crm_join_welcomed) == 0) { + int count = crmd_join_phase_count(crm_join_integrated); + + crm_debug("join-%d: Integration of %d peer%s complete " + CRM_XS " state=%s for=%s", + current_join_id, count, pcmk__plural_s(count), + fsa_state2string(cur_state), source); + register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL); + return TRUE; + } + + } else if (cur_state == S_FINALIZE_JOIN) { + if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { + crm_debug("join-%d: Delaying finalization until we have CIB " + CRM_XS " state=%s for=%s", + current_join_id, fsa_state2string(cur_state), source); + return TRUE; + + } else if (crmd_join_phase_count(crm_join_welcomed) != 0) { + int count = crmd_join_phase_count(crm_join_welcomed); + + crm_debug("join-%d: Still waiting on %d welcomed node%s " + CRM_XS " state=%s for=%s", + current_join_id, count, pcmk__plural_s(count), + fsa_state2string(cur_state), source); + crmd_join_phase_log(LOG_DEBUG); + + } else if (crmd_join_phase_count(crm_join_integrated) != 0) { + int count = crmd_join_phase_count(crm_join_integrated); + + crm_debug("join-%d: Still waiting on %d integrated node%s " + CRM_XS " state=%s for=%s", + current_join_id, count, pcmk__plural_s(count), + fsa_state2string(cur_state), source); + crmd_join_phase_log(LOG_DEBUG); + + } else if (crmd_join_phase_count(crm_join_finalized) != 0) { + int count = crmd_join_phase_count(crm_join_finalized); + + crm_debug("join-%d: Still waiting on %d finalized node%s " + CRM_XS " state=%s for=%s", + current_join_id, count, pcmk__plural_s(count), + fsa_state2string(cur_state), source); + crmd_join_phase_log(LOG_DEBUG); + + } else { + crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s", + current_join_id, fsa_state2string(cur_state), source); + register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL); + return TRUE; + } + } + + return FALSE; +} + +void +do_dc_join_final(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + crm_debug("Ensuring DC, quorum and node attributes are up-to-date"); + crm_update_quorum(crm_have_quorum, TRUE); +} + +int crmd_join_phase_count(enum crm_join_phase phase) +{ + int count = 0; + crm_node_t *peer; + GHashTableIter iter; + + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { + if(peer->join == phase) { + count++; + } + } + return count; +} + +void crmd_join_phase_log(int level) +{ + crm_node_t *peer; + GHashTableIter iter; + + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { + do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname, + crm_join_phase_str(peer->join)); + } +} diff --git a/daemons/controld/controld_lrm.h b/daemons/controld/controld_lrm.h new file mode 100644 index 0000000..25f3db3 --- /dev/null +++ b/daemons/controld/controld_lrm.h @@ -0,0 +1,188 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ +#ifndef CONTROLD_LRM__H +# define CONTROLD_LRM__H + +#include <controld_messages.h> + +extern gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level); +void lrm_clear_last_failure(const char *rsc_id, const char *node_name, + const char *operation, guint interval_ms); +void lrm_op_callback(lrmd_event_data_t * op); +lrmd_t *crmd_local_lrmd_conn(void); + +typedef struct resource_history_s { + char *id; + uint32_t last_callid; + lrmd_rsc_info_t rsc; + lrmd_event_data_t *last; + lrmd_event_data_t *failed; + GList *recurring_op_list; + + /* Resources must be stopped using the same + * parameters they were started with. This hashtable + * holds the parameters that should be used for the next stop + * cmd on this resource. */ + GHashTable *stop_params; +} rsc_history_t; + +void history_free(gpointer data); + +enum active_op_e { + active_op_remove = (1 << 0), + active_op_cancelled = (1 << 1), +}; + +// In-flight action (recurring or pending) +typedef struct active_op_s { + guint interval_ms; + int call_id; + uint32_t flags; // bitmask of active_op_e + time_t start_time; + time_t lock_time; + char *rsc_id; + char *op_type; + char *op_key; + char *user_data; + GHashTable *params; +} active_op_t; + +#define controld_set_active_op_flags(active_op, flags_to_set) do { \ + (active_op)->flags = pcmk__set_flags_as(__func__, __LINE__, \ + LOG_TRACE, "Active operation", (active_op)->op_key, \ + (active_op)->flags, (flags_to_set), #flags_to_set); \ + } while (0) + +#define controld_clear_active_op_flags(active_op, flags_to_clear) do { \ + (active_op)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ + LOG_TRACE, "Active operation", (active_op)->op_key, \ + (active_op)->flags, (flags_to_clear), #flags_to_clear); \ + } while (0) + +typedef struct lrm_state_s { + const char *node_name; + void *conn; // Reserved for controld_execd_state.c usage + void *remote_ra_data; // Reserved for controld_remote_ra.c usage + + GHashTable *resource_history; + GHashTable *active_ops; // Pending and recurring actions + GHashTable *deletion_ops; + GHashTable *rsc_info_cache; + GHashTable *metadata_cache; // key = class[:provider]:agent, value = ra_metadata_s + + int num_lrm_register_fails; +} lrm_state_t; + +struct pending_deletion_op_s { + char *rsc; + ha_msg_input_t *input; +}; + +/*! + * \brief Check whether this the local IPC connection to the executor + */ +gboolean +lrm_state_is_local(lrm_state_t *lrm_state); + +/*! + * \brief Clear all state information from a single state entry. + * \note It sometimes useful to save metadata cache when it won't go stale. + * \note This does not close the executor connection + */ +void lrm_state_reset_tables(lrm_state_t * lrm_state, gboolean reset_metadata); +GList *lrm_state_get_list(void); + +/*! + * \brief Initiate internal state tables + */ +gboolean lrm_state_init_local(void); + +/*! + * \brief Destroy all state entries and internal state tables + */ +void lrm_state_destroy_all(void); + +/*! + * \brief Destroy executor connection by node name + */ +void lrm_state_destroy(const char *node_name); + +/*! + * \brief Find lrm_state data by node name + */ +lrm_state_t *lrm_state_find(const char *node_name); + +/*! + * \brief Either find or create a new entry + */ +lrm_state_t *lrm_state_find_or_create(const char *node_name); + +/*! + * The functions below are wrappers for the executor API the the controller + * uses. These wrapper functions allow us to treat the controller's remote + * executor connection resources the same as regular resources. Internally, + * regular resources go to the executor, and remote connection resources are + * handled locally in the controller. + */ +void lrm_state_disconnect_only(lrm_state_t * lrm_state); +void lrm_state_disconnect(lrm_state_t * lrm_state); +int controld_connect_local_executor(lrm_state_t *lrm_state); +int controld_connect_remote_executor(lrm_state_t *lrm_state, const char *server, + int port, int timeout); +int lrm_state_is_connected(lrm_state_t * lrm_state); +int lrm_state_poke_connection(lrm_state_t * lrm_state); + +int lrm_state_get_metadata(lrm_state_t * lrm_state, + const char *class, + const char *provider, + const char *agent, char **output, enum lrmd_call_options options); +int lrm_state_cancel(lrm_state_t *lrm_state, const char *rsc_id, + const char *action, guint interval_ms); +int controld_execute_resource_agent(lrm_state_t *lrm_state, const char *rsc_id, + const char *action, const char *userdata, + guint interval_ms, int timeout_ms, + int start_delay_ms, + GHashTable *parameters, int *call_id); +lrmd_rsc_info_t *lrm_state_get_rsc_info(lrm_state_t * lrm_state, + const char *rsc_id, enum lrmd_call_options options); +int lrm_state_register_rsc(lrm_state_t * lrm_state, + const char *rsc_id, + const char *class, + const char *provider, const char *agent, enum lrmd_call_options options); +int lrm_state_unregister_rsc(lrm_state_t * lrm_state, + const char *rsc_id, enum lrmd_call_options options); + +// Functions used to manage remote executor connection resources +void remote_lrm_op_callback(lrmd_event_data_t * op); +gboolean is_remote_lrmd_ra(const char *agent, const char *provider, const char *id); +lrmd_rsc_info_t *remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id); +int remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id, + const char *action, guint interval_ms); +int controld_execute_remote_agent(const lrm_state_t *lrm_state, + const char *rsc_id, const char *action, + const char *userdata, + guint interval_ms, int timeout_ms, + int start_delay_ms, lrmd_key_value_t *params, + int *call_id); +void remote_ra_cleanup(lrm_state_t * lrm_state); +void remote_ra_fail(const char *node_name); +void remote_ra_process_pseudo(xmlNode *xml); +gboolean remote_ra_is_in_maintenance(lrm_state_t * lrm_state); +void remote_ra_process_maintenance_nodes(xmlNode *xml); +gboolean remote_ra_controlling_guest(lrm_state_t * lrm_state); + +void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, + active_op_t *pending, const xmlNode *action_xml); +void controld_ack_event_directly(const char *to_host, const char *to_sys, + const lrmd_rsc_info_t *rsc, + lrmd_event_data_t *op, const char *rsc_id); +void controld_rc2event(lrmd_event_data_t *event, int rc); +void controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id); + +#endif diff --git a/daemons/controld/controld_matrix.c b/daemons/controld/controld_matrix.c new file mode 100644 index 0000000..a404f0a --- /dev/null +++ b/daemons/controld/controld_matrix.c @@ -0,0 +1,1250 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <stdint.h> // uint64_t + +#include <pacemaker-controld.h> + +/* + * The state transition table. The rows are inputs, and + * the columns are states. + */ +static const enum crmd_fsa_state fsa_next_states[MAXINPUT][MAXSTATE] = { +/* Got an I_NULL */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_CIB_OP */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_CIB_UPDATE */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_RECOVERY, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_RECOVERY, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_DC_TIMEOUT */ + { + /* S_IDLE ==> */ S_RECOVERY, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_RECOVERY, + /* S_FINALIZE_JOIN ==> */ S_RECOVERY, + /* S_NOT_DC ==> */ S_ELECTION, + /* S_POLICY_ENGINE ==> */ S_RECOVERY, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RECOVERY, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_ELECTION, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_RECOVERY, + /* S_HALT ==> */ S_ELECTION, + }, + +/* Got an I_ELECTION */ + { + /* S_IDLE ==> */ S_ELECTION, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_ELECTION, + /* S_FINALIZE_JOIN ==> */ S_ELECTION, + /* S_NOT_DC ==> */ S_ELECTION, + /* S_POLICY_ENGINE ==> */ S_ELECTION, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_ELECTION, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_ELECTION, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_PE_CALC */ + { + /* S_IDLE ==> */ S_POLICY_ENGINE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_POLICY_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_RELEASE_DC */ + { + /* S_IDLE ==> */ S_RELEASE_DC, + /* S_ELECTION ==> */ S_RELEASE_DC, + /* S_INTEGRATION ==> */ S_RELEASE_DC, + /* S_FINALIZE_JOIN ==> */ S_RELEASE_DC, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_RELEASE_DC, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_RELEASE_DC, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_ELECTION_DC */ + { + /* S_IDLE ==> */ S_INTEGRATION, + /* S_ELECTION ==> */ S_INTEGRATION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_INTEGRATION, + /* S_NOT_DC ==> */ S_INTEGRATION, + /* S_POLICY_ENGINE ==> */ S_INTEGRATION, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_ERROR */ + { + /* S_IDLE ==> */ S_RECOVERY, + /* S_ELECTION ==> */ S_RECOVERY, + /* S_INTEGRATION ==> */ S_RECOVERY, + /* S_FINALIZE_JOIN ==> */ S_RECOVERY, + /* S_NOT_DC ==> */ S_RECOVERY, + /* S_POLICY_ENGINE ==> */ S_RECOVERY, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RECOVERY, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_RECOVERY, + /* S_STOPPING ==> */ S_TERMINATE, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_RECOVERY, + /* S_HALT ==> */ S_RECOVERY, + }, + +/* Got an I_FAIL */ + { + /* S_IDLE ==> */ S_RECOVERY, + /* S_ELECTION ==> */ S_RELEASE_DC, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_INTEGRATION, + /* S_NOT_DC ==> */ S_RECOVERY, + /* S_POLICY_ENGINE ==> */ S_INTEGRATION, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STOPPING, + /* S_PENDING ==> */ S_STOPPING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_POLICY_ENGINE, + /* S_HALT ==> */ S_RELEASE_DC, + }, + +/* Got an I_INTEGRATED */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_FINALIZE_JOIN, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_RECOVERY, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_FINALIZED */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_POLICY_ENGINE, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_RECOVERY, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_NODE_JOIN */ + { + /* S_IDLE ==> */ S_INTEGRATION, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_INTEGRATION, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_INTEGRATION, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_NOT_DC */ + { + /* S_IDLE ==> */ S_RECOVERY, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_RECOVERY, + /* S_FINALIZE_JOIN ==> */ S_RECOVERY, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_RECOVERY, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_NOT_DC, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_RECOVERY, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_RECOVERED */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_INTEGRATION, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_PENDING, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_RELEASE_FAIL */ + { + /* S_IDLE ==> */ S_STOPPING, + /* S_ELECTION ==> */ S_STOPPING, + /* S_INTEGRATION ==> */ S_STOPPING, + /* S_FINALIZE_JOIN ==> */ S_STOPPING, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_STOPPING, + /* S_RECOVERY ==> */ S_STOPPING, + /* S_RELEASE_DC ==> */ S_STOPPING, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_STOPPING, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_RELEASE_SUCCESS */ + { + /* S_IDLE ==> */ S_RECOVERY, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_RECOVERY, + /* S_FINALIZE_JOIN ==> */ S_RECOVERY, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_RECOVERY, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_PENDING, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_RECOVERY, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_RESTART */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_TE_SUCCESS */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_IDLE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_ROUTER */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_SHUTDOWN */ + { + /* S_IDLE ==> */ S_POLICY_ENGINE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_STOPPING, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STOPPING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_POLICY_ENGINE, + /* S_HALT ==> */ S_ELECTION, + }, + +/* Got an I_STOP */ + { + /* S_IDLE ==> */ S_STOPPING, + /* S_ELECTION ==> */ S_STOPPING, + /* S_INTEGRATION ==> */ S_STOPPING, + /* S_FINALIZE_JOIN ==> */ S_STOPPING, + /* S_NOT_DC ==> */ S_STOPPING, + /* S_POLICY_ENGINE ==> */ S_STOPPING, + /* S_RECOVERY ==> */ S_STOPPING, + /* S_RELEASE_DC ==> */ S_STOPPING, + /* S_STARTING ==> */ S_STOPPING, + /* S_PENDING ==> */ S_STOPPING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_STOPPING, + /* S_HALT ==> */ S_STOPPING, + }, + +/* Got an I_TERMINATE */ + { + /* S_IDLE ==> */ S_TERMINATE, + /* S_ELECTION ==> */ S_TERMINATE, + /* S_INTEGRATION ==> */ S_TERMINATE, + /* S_FINALIZE_JOIN ==> */ S_TERMINATE, + /* S_NOT_DC ==> */ S_TERMINATE, + /* S_POLICY_ENGINE ==> */ S_TERMINATE, + /* S_RECOVERY ==> */ S_TERMINATE, + /* S_RELEASE_DC ==> */ S_TERMINATE, + /* S_STARTING ==> */ S_TERMINATE, + /* S_PENDING ==> */ S_TERMINATE, + /* S_STOPPING ==> */ S_TERMINATE, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TERMINATE, + /* S_HALT ==> */ S_TERMINATE, + }, + +/* Got an I_STARTUP */ + { + /* S_IDLE ==> */ S_RECOVERY, + /* S_ELECTION ==> */ S_RECOVERY, + /* S_INTEGRATION ==> */ S_RECOVERY, + /* S_FINALIZE_JOIN ==> */ S_RECOVERY, + /* S_NOT_DC ==> */ S_RECOVERY, + /* S_POLICY_ENGINE ==> */ S_RECOVERY, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_RECOVERY, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_PE_SUCCESS */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_JOIN_OFFER */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_PENDING, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_JOIN_REQUEST */ + { + /* S_IDLE ==> */ S_INTEGRATION, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_INTEGRATION, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_INTEGRATION, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_JOIN_RESULT */ + { + /* S_IDLE ==> */ S_INTEGRATION, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_PENDING, + /* S_POLICY_ENGINE ==> */ S_INTEGRATION, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_WAIT_FOR_EVENT */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_DC_HEARTBEAT */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_LRM_EVENT */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_PENDING */ + { + /* S_IDLE ==> */ S_PENDING, + /* S_ELECTION ==> */ S_PENDING, + /* S_INTEGRATION ==> */ S_PENDING, + /* S_FINALIZE_JOIN ==> */ S_PENDING, + /* S_NOT_DC ==> */ S_PENDING, + /* S_POLICY_ENGINE ==> */ S_PENDING, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_PENDING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_PENDING, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_HALT */ + { + /* S_IDLE ==> */ S_HALT, + /* S_ELECTION ==> */ S_HALT, + /* S_INTEGRATION ==> */ S_HALT, + /* S_FINALIZE_JOIN ==> */ S_HALT, + /* S_NOT_DC ==> */ S_HALT, + /* S_POLICY_ENGINE ==> */ S_HALT, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_HALT, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_HALT, + /* S_HALT ==> */ S_HALT, + }, +}; + +/* + * The action table. Each entry is a set of actions to take or-ed + * together. Like the state table, the rows are inputs, and + * the columns are states. + */ + +/* NOTE: In the fsa, the actions are extracted then state is updated. */ + +static const uint64_t fsa_actions[MAXINPUT][MAXSTATE] = { + +/* Got an I_NULL */ + { + /* S_IDLE ==> */ A_NOTHING, + /* S_ELECTION ==> */ A_NOTHING, + /* S_INTEGRATION ==> */ A_NOTHING, + /* S_FINALIZE_JOIN ==> */ A_NOTHING, + /* S_NOT_DC ==> */ A_NOTHING, + /* S_POLICY_ENGINE ==> */ A_NOTHING, + /* S_RECOVERY ==> */ A_NOTHING, + /* S_RELEASE_DC ==> */ A_NOTHING, + /* S_STARTING ==> */ A_NOTHING, + /* S_PENDING ==> */ A_NOTHING, + /* S_STOPPING ==> */ A_NOTHING, + /* S_TERMINATE ==> */ A_NOTHING, + /* S_TRANSITION_ENGINE ==> */ A_NOTHING, + /* S_HALT ==> */ A_NOTHING, + }, + +/* Got an I_CIB_OP */ + { + /* S_IDLE ==> */ A_ERROR, + /* S_ELECTION ==> */ A_ERROR, + /* S_INTEGRATION ==> */ A_ERROR, + /* S_FINALIZE_JOIN ==> */ A_ERROR, + /* S_NOT_DC ==> */ A_ERROR, + /* S_POLICY_ENGINE ==> */ A_ERROR, + /* S_RECOVERY ==> */ A_ERROR, + /* S_RELEASE_DC ==> */ A_ERROR, + /* S_STARTING ==> */ A_ERROR, + /* S_PENDING ==> */ A_ERROR, + /* S_STOPPING ==> */ A_ERROR, + /* S_TERMINATE ==> */ A_ERROR, + /* S_TRANSITION_ENGINE ==> */ A_ERROR, + /* S_HALT ==> */ A_ERROR, + }, + +/* Got an I_CIB_UPDATE */ + { + /* S_IDLE ==> */ A_LOG, + /* S_ELECTION ==> */ A_LOG, + /* S_INTEGRATION ==> */ A_WARN, + /* S_FINALIZE_JOIN ==> */ A_WARN, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_LOG, + /* S_RECOVERY ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_LOG, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_DC_TIMEOUT */ + { + /* S_IDLE ==> */ A_WARN, + /* S_ELECTION ==> */ A_ELECTION_VOTE, + /* S_INTEGRATION ==> */ A_WARN, + /* S_FINALIZE_JOIN ==> */ A_WARN, + /* S_NOT_DC ==> */ A_ELECTION_VOTE | A_WARN, + /* S_POLICY_ENGINE ==> */ A_WARN, + /* S_RECOVERY ==> */ A_NOTHING, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_ELECTION_VOTE | A_WARN, + /* S_STOPPING ==> */ A_NOTHING, + /* S_TERMINATE ==> */ A_NOTHING, + /* S_TRANSITION_ENGINE ==> */ A_TE_CANCEL | A_WARN, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_ELECTION */ + { + /* S_IDLE ==> */ A_ELECTION_VOTE, + /* S_ELECTION ==> */ A_ELECTION_VOTE, + /* S_INTEGRATION ==> */ A_ELECTION_VOTE, + /* S_FINALIZE_JOIN ==> */ A_ELECTION_VOTE, + /* S_NOT_DC ==> */ A_ELECTION_VOTE, + /* S_POLICY_ENGINE ==> */ A_ELECTION_VOTE, + /* S_RECOVERY ==> */ A_LOG, + /* S_RELEASE_DC ==> */ A_LOG, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_ELECTION_VOTE, + /* S_STOPPING ==> */ A_LOG, + /* S_TERMINATE ==> */ A_LOG, + /* S_TRANSITION_ENGINE ==> */ A_ELECTION_VOTE, + /* S_HALT ==> */ A_ELECTION_VOTE, + }, + +/* Got an I_PE_CALC */ + { + /* S_IDLE ==> */ A_PE_INVOKE, + /* S_ELECTION ==> */ A_NOTHING, + /* S_INTEGRATION ==> */ A_NOTHING, + /* S_FINALIZE_JOIN ==> */ A_NOTHING, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_PE_INVOKE, + /* S_RECOVERY ==> */ A_NOTHING, + /* S_RELEASE_DC ==> */ A_NOTHING, + /* S_STARTING ==> */ A_ERROR, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_ERROR, + /* S_TRANSITION_ENGINE ==> */ A_PE_INVOKE, + /* S_HALT ==> */ A_ERROR, + }, + +/* Got an I_RELEASE_DC */ + { + /* S_IDLE ==> */ O_RELEASE, + /* S_ELECTION ==> */ O_RELEASE, + /* S_INTEGRATION ==> */ O_RELEASE | A_WARN, + /* S_FINALIZE_JOIN ==> */ O_RELEASE | A_WARN, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ O_RELEASE | A_WARN, + /* S_RECOVERY ==> */ O_RELEASE, + /* S_RELEASE_DC ==> */ O_RELEASE | A_WARN, + /* S_STARTING ==> */ A_ERROR, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ O_RELEASE | A_WARN, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_ELECTION_DC */ + { + /* S_IDLE ==> */ A_WARN | A_ELECTION_VOTE, + /* S_ELECTION ==> */ + A_LOG | A_DC_TAKEOVER | A_PE_START | A_TE_START | A_DC_JOIN_OFFER_ALL | A_DC_TIMER_STOP, + /* S_INTEGRATION ==> */ A_WARN | A_ELECTION_VOTE | A_DC_JOIN_OFFER_ALL, + /* S_FINALIZE_JOIN ==> */ A_WARN | A_ELECTION_VOTE | A_DC_JOIN_OFFER_ALL, + /* S_NOT_DC ==> */ A_LOG | A_ELECTION_VOTE, + /* S_POLICY_ENGINE ==> */ A_WARN | A_ELECTION_VOTE, + /* S_RECOVERY ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_WARN | A_ELECTION_VOTE, + /* S_STARTING ==> */ A_LOG | A_WARN, + /* S_PENDING ==> */ A_LOG | A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_WARN | A_ELECTION_VOTE, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_ERROR */ + { + /* S_IDLE ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START, + /* S_ELECTION ==> */ A_ERROR | A_RECOVER | O_RELEASE, + /* S_INTEGRATION ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START, + /* S_FINALIZE_JOIN ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START, + /* S_NOT_DC ==> */ A_ERROR | A_RECOVER, + /* S_POLICY_ENGINE ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START, + /* S_RECOVERY ==> */ A_ERROR | O_RELEASE, + /* S_RELEASE_DC ==> */ A_ERROR | A_RECOVER, + /* S_STARTING ==> */ A_ERROR | A_RECOVER, + /* S_PENDING ==> */ A_ERROR | A_RECOVER, + /* S_STOPPING ==> */ A_ERROR | A_EXIT_1, + /* S_TERMINATE ==> */ A_ERROR | A_EXIT_1, + /* S_TRANSITION_ENGINE ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START, + /* S_HALT ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START, + }, + +/* Got an I_FAIL */ + { + /* S_IDLE ==> */ A_WARN, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_WARN | A_DC_JOIN_OFFER_ALL, + /* S_FINALIZE_JOIN ==> */ A_WARN | A_DC_JOIN_OFFER_ALL, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_WARN | A_DC_JOIN_OFFER_ALL | A_TE_CANCEL, + /* S_RECOVERY ==> */ A_WARN | O_RELEASE, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN | A_EXIT_1, + /* S_TRANSITION_ENGINE ==> */ A_WARN | O_LRM_RECONNECT | A_PE_INVOKE | A_TE_CANCEL, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_INTEGRATED */ + { + /* S_IDLE ==> */ A_NOTHING, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_DC_JOIN_FINALIZE, + /* S_FINALIZE_JOIN ==> */ A_WARN, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_NOTHING, + /* S_RECOVERY ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_NOTHING, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_FINALIZED */ + { + /* S_IDLE ==> */ A_NOTHING, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_WARN, + /* S_FINALIZE_JOIN ==> */ A_DC_JOIN_FINAL | A_TE_CANCEL, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_NOTHING, + /* S_RECOVERY ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_NOTHING, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_NODE_JOIN */ + { + /* S_IDLE ==> */ A_TE_HALT | A_DC_JOIN_OFFER_ONE, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_DC_JOIN_OFFER_ONE, + /* S_FINALIZE_JOIN ==> */ A_DC_JOIN_OFFER_ONE, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_DC_JOIN_OFFER_ONE, + /* S_RECOVERY ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_TE_HALT | A_DC_JOIN_OFFER_ONE, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_NOT_DC */ + { + /* S_IDLE ==> */ A_WARN | O_RELEASE, + /* S_ELECTION ==> */ A_ERROR | A_ELECTION_START | A_DC_TIMER_STOP, + /* S_INTEGRATION ==> */ A_ERROR | O_RELEASE, + /* S_FINALIZE_JOIN ==> */ A_ERROR | O_RELEASE, + /* S_NOT_DC ==> */ A_LOG, + /* S_POLICY_ENGINE ==> */ A_ERROR | O_RELEASE, + /* S_RECOVERY ==> */ A_ERROR | O_RELEASE, + /* S_RELEASE_DC ==> */ A_ERROR | O_RELEASE, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_LOG | A_DC_TIMER_STOP, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_ERROR | O_RELEASE, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_RECOVERED */ + { + /* S_IDLE ==> */ A_WARN, + /* S_ELECTION ==> */ A_ELECTION_VOTE, + /* S_INTEGRATION ==> */ A_WARN, + /* S_FINALIZE_JOIN ==> */ A_WARN, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_WARN, + /* S_RECOVERY ==> */ A_LOG, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_WARN, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_RELEASE_FAIL */ + { + /* S_IDLE ==> */ A_WARN, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_WARN, + /* S_FINALIZE_JOIN ==> */ A_WARN, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_NOTHING, + /* S_RECOVERY ==> */ A_WARN | A_SHUTDOWN_REQ, + /* S_RELEASE_DC ==> */ A_NOTHING, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_WARN, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_RELEASE_SUCCESS */ + { + /* S_IDLE ==> */ A_WARN, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_WARN, + /* S_FINALIZE_JOIN ==> */ A_WARN, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_WARN, + /* S_RECOVERY ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_LOG, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_LOG, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_WARN, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_RESTART */ + { + /* S_IDLE ==> */ A_NOTHING, + /* S_ELECTION ==> */ A_LOG | A_ELECTION_VOTE, + /* S_INTEGRATION ==> */ A_LOG | A_DC_JOIN_OFFER_ALL, + /* S_FINALIZE_JOIN ==> */ A_LOG | A_DC_JOIN_FINALIZE, + /* S_NOT_DC ==> */ A_LOG | A_NOTHING, + /* S_POLICY_ENGINE ==> */ A_LOG | A_PE_INVOKE, + /* S_RECOVERY ==> */ A_LOG | A_RECOVER | O_RELEASE, + /* S_RELEASE_DC ==> */ A_LOG | O_RELEASE, + /* S_STARTING ==> */ A_LOG, + /* S_PENDING ==> */ A_LOG, + /* S_STOPPING ==> */ A_LOG, + /* S_TERMINATE ==> */ A_LOG, + /* S_TRANSITION_ENGINE ==> */ A_LOG | A_TE_INVOKE, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_TE_SUCCESS */ + { + /* S_IDLE ==> */ A_LOG, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_WARN, + /* S_FINALIZE_JOIN ==> */ A_WARN, + /* S_NOT_DC ==> */ A_ERROR, + /* S_POLICY_ENGINE ==> */ A_WARN, + /* S_RECOVERY ==> */ A_RECOVER | A_WARN, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ A_ERROR, + /* S_PENDING ==> */ A_ERROR, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_LOG, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_ROUTER */ + { + /* S_IDLE ==> */ A_MSG_ROUTE, + /* S_ELECTION ==> */ A_MSG_ROUTE, + /* S_INTEGRATION ==> */ A_MSG_ROUTE, + /* S_FINALIZE_JOIN ==> */ A_MSG_ROUTE, + /* S_NOT_DC ==> */ A_MSG_ROUTE, + /* S_POLICY_ENGINE ==> */ A_MSG_ROUTE, + /* S_RECOVERY ==> */ A_MSG_ROUTE, + /* S_RELEASE_DC ==> */ A_MSG_ROUTE, + /* S_STARTING ==> */ A_MSG_ROUTE, + /* S_PENDING ==> */ A_MSG_ROUTE, + /* S_STOPPING ==> */ A_MSG_ROUTE, + /* S_TERMINATE ==> */ A_MSG_ROUTE, + /* S_TRANSITION_ENGINE ==> */ A_MSG_ROUTE, + /* S_HALT ==> */ A_WARN | A_MSG_ROUTE, + }, + +/* Got an I_SHUTDOWN */ + { + /* S_IDLE ==> */ A_LOG | A_SHUTDOWN_REQ, + /* S_ELECTION ==> */ A_LOG | A_SHUTDOWN_REQ | A_ELECTION_VOTE, + /* S_INTEGRATION ==> */ A_LOG | A_SHUTDOWN_REQ, + /* S_FINALIZE_JOIN ==> */ A_LOG | A_SHUTDOWN_REQ, + /* S_NOT_DC ==> */ A_SHUTDOWN_REQ, + /* S_POLICY_ENGINE ==> */ A_LOG | A_SHUTDOWN_REQ, + /* S_RECOVERY ==> */ A_WARN | O_EXIT | O_RELEASE, + /* S_RELEASE_DC ==> */ A_WARN | A_SHUTDOWN_REQ, + /* S_STARTING ==> */ A_WARN | O_EXIT, + /* S_PENDING ==> */ A_SHUTDOWN_REQ, + /* S_STOPPING ==> */ A_LOG, + /* S_TERMINATE ==> */ A_LOG, + /* S_TRANSITION_ENGINE ==> */ A_WARN | A_SHUTDOWN_REQ, + /* S_HALT ==> */ A_WARN | A_ELECTION_START | A_SHUTDOWN_REQ, + }, + +/* Got an I_STOP */ + { + /* S_IDLE ==> */ A_ERROR | O_RELEASE | O_EXIT, + /* S_ELECTION ==> */ O_RELEASE | O_EXIT, + /* S_INTEGRATION ==> */ A_WARN | O_RELEASE | O_EXIT, + /* S_FINALIZE_JOIN ==> */ A_ERROR | O_RELEASE | O_EXIT, + /* S_NOT_DC ==> */ O_EXIT, + /* S_POLICY_ENGINE ==> */ A_WARN | O_RELEASE | O_EXIT, + /* S_RECOVERY ==> */ A_ERROR | O_RELEASE | O_EXIT, + /* S_RELEASE_DC ==> */ A_ERROR | O_RELEASE | O_EXIT, + /* S_STARTING ==> */ O_EXIT, + /* S_PENDING ==> */ O_EXIT, + /* S_STOPPING ==> */ O_EXIT, + /* S_TERMINATE ==> */ A_ERROR | A_EXIT_1, + /* S_TRANSITION_ENGINE ==> */ A_LOG | O_RELEASE | O_EXIT, + /* S_HALT ==> */ O_RELEASE | O_EXIT | A_WARN, + }, + +/* Got an I_TERMINATE */ + { + /* S_IDLE ==> */ A_ERROR | O_EXIT, + /* S_ELECTION ==> */ A_ERROR | O_EXIT, + /* S_INTEGRATION ==> */ A_ERROR | O_EXIT, + /* S_FINALIZE_JOIN ==> */ A_ERROR | O_EXIT, + /* S_NOT_DC ==> */ A_ERROR | O_EXIT, + /* S_POLICY_ENGINE ==> */ A_ERROR | O_EXIT, + /* S_RECOVERY ==> */ A_ERROR | O_EXIT, + /* S_RELEASE_DC ==> */ A_ERROR | O_EXIT, + /* S_STARTING ==> */ O_EXIT, + /* S_PENDING ==> */ A_ERROR | O_EXIT, + /* S_STOPPING ==> */ O_EXIT, + /* S_TERMINATE ==> */ O_EXIT, + /* S_TRANSITION_ENGINE ==> */ A_ERROR | O_EXIT, + /* S_HALT ==> */ A_ERROR | O_EXIT, + }, + +/* Got an I_STARTUP */ + { + /* S_IDLE ==> */ A_WARN, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_WARN, + /* S_FINALIZE_JOIN ==> */ A_WARN, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_WARN, + /* S_RECOVERY ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ + A_LOG | A_STARTUP | A_CIB_START | A_LRM_CONNECT | A_HA_CONNECT | A_READCONFIG | A_STARTED, + /* S_PENDING ==> */ A_LOG, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_WARN, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_PE_SUCCESS */ + { + /* S_IDLE ==> */ A_LOG, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_WARN, + /* S_FINALIZE_JOIN ==> */ A_WARN, + /* S_NOT_DC ==> */ A_NOTHING, + /* S_POLICY_ENGINE ==> */ A_TE_INVOKE, + /* S_RECOVERY ==> */ A_RECOVER | A_LOG, + /* S_RELEASE_DC ==> */ A_LOG, + /* S_STARTING ==> */ A_ERROR, + /* S_PENDING ==> */ A_LOG, + /* S_STOPPING ==> */ A_ERROR, + /* S_TERMINATE ==> */ A_ERROR, + /* S_TRANSITION_ENGINE ==> */ A_LOG, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_JOIN_OFFER */ + { + /* S_IDLE ==> */ A_WARN | A_CL_JOIN_REQUEST, + /* S_ELECTION ==> */ A_WARN | A_ELECTION_VOTE, + /* S_INTEGRATION ==> */ A_CL_JOIN_REQUEST, + /* S_FINALIZE_JOIN ==> */ A_CL_JOIN_REQUEST, + /* S_NOT_DC ==> */ A_CL_JOIN_REQUEST | A_DC_TIMER_STOP, + /* S_POLICY_ENGINE ==> */ A_WARN | A_CL_JOIN_REQUEST, + /* S_RECOVERY ==> */ A_WARN | A_CL_JOIN_REQUEST | A_DC_TIMER_STOP, + /* S_RELEASE_DC ==> */ A_WARN | A_CL_JOIN_REQUEST | A_DC_TIMER_STOP, + /* S_STARTING ==> */ A_LOG, + /* S_PENDING ==> */ A_CL_JOIN_REQUEST | A_DC_TIMER_STOP, + /* S_STOPPING ==> */ A_LOG, + /* S_TERMINATE ==> */ A_LOG, + /* S_TRANSITION_ENGINE ==> */ A_WARN | A_CL_JOIN_REQUEST, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_JOIN_REQUEST */ + { + /* S_IDLE ==> */ A_DC_JOIN_OFFER_ONE, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_DC_JOIN_PROCESS_REQ, + /* S_FINALIZE_JOIN ==> */ A_DC_JOIN_OFFER_ONE, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_DC_JOIN_OFFER_ONE, + /* S_RECOVERY ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_DC_JOIN_OFFER_ONE, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_JOIN_RESULT */ + { + /* S_IDLE ==> */ A_ERROR | A_TE_HALT | A_DC_JOIN_OFFER_ALL, + /* S_ELECTION ==> */ A_LOG, + /* S_INTEGRATION ==> */ A_LOG | A_CL_JOIN_RESULT | A_DC_JOIN_PROCESS_ACK, + /* S_FINALIZE_JOIN ==> */ A_CL_JOIN_RESULT | A_DC_JOIN_PROCESS_ACK, + /* S_NOT_DC ==> */ A_ERROR | A_CL_JOIN_ANNOUNCE, + /* S_POLICY_ENGINE ==> */ A_ERROR | A_TE_HALT | A_DC_JOIN_OFFER_ALL, + /* S_RECOVERY ==> */ A_LOG, + /* S_RELEASE_DC ==> */ A_LOG, + /* S_STARTING ==> */ A_ERROR, + /* S_PENDING ==> */ A_CL_JOIN_RESULT, + /* S_STOPPING ==> */ A_ERROR, + /* S_TERMINATE ==> */ A_ERROR, + /* S_TRANSITION_ENGINE ==> */ A_ERROR | A_TE_HALT | A_DC_JOIN_OFFER_ALL, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_WAIT_FOR_EVENT */ + { + /* S_IDLE ==> */ A_LOG, + /* S_ELECTION ==> */ A_LOG, + /* S_INTEGRATION ==> */ A_LOG, + /* S_FINALIZE_JOIN ==> */ A_LOG, + /* S_NOT_DC ==> */ A_LOG, + /* S_POLICY_ENGINE ==> */ A_LOG, + /* S_RECOVERY ==> */ A_LOG, + /* S_RELEASE_DC ==> */ A_LOG, + /* S_STARTING ==> */ A_LOG, + /* S_PENDING ==> */ A_LOG, + /* S_STOPPING ==> */ A_LOG, + /* S_TERMINATE ==> */ A_LOG, + /* S_TRANSITION_ENGINE ==> */ A_LOG, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_DC_HEARTBEAT */ + { + /* S_IDLE ==> */ A_ERROR, + /* S_ELECTION ==> */ A_WARN | A_ELECTION_VOTE, + /* S_INTEGRATION ==> */ A_ERROR, + /* S_FINALIZE_JOIN ==> */ A_ERROR, + /* S_NOT_DC ==> */ A_NOTHING, + /* S_POLICY_ENGINE ==> */ A_ERROR, + /* S_RECOVERY ==> */ A_NOTHING, + /* S_RELEASE_DC ==> */ A_LOG, + /* S_STARTING ==> */ A_LOG, + /* S_PENDING ==> */ A_LOG | A_CL_JOIN_ANNOUNCE, + /* S_STOPPING ==> */ A_NOTHING, + /* S_TERMINATE ==> */ A_NOTHING, + /* S_TRANSITION_ENGINE ==> */ A_ERROR, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_LRM_EVENT */ + { + /* S_IDLE ==> */ A_LRM_EVENT, + /* S_ELECTION ==> */ A_LRM_EVENT, + /* S_INTEGRATION ==> */ A_LRM_EVENT, + /* S_FINALIZE_JOIN ==> */ A_LRM_EVENT, + /* S_NOT_DC ==> */ A_LRM_EVENT, + /* S_POLICY_ENGINE ==> */ A_LRM_EVENT, + /* S_RECOVERY ==> */ A_LRM_EVENT, + /* S_RELEASE_DC ==> */ A_LRM_EVENT, + /* S_STARTING ==> */ A_LRM_EVENT, + /* S_PENDING ==> */ A_LRM_EVENT, + /* S_STOPPING ==> */ A_LRM_EVENT, + /* S_TERMINATE ==> */ A_LRM_EVENT, + /* S_TRANSITION_ENGINE ==> */ A_LRM_EVENT, + /* S_HALT ==> */ A_WARN, + }, + +/* For everyone ending up in S_PENDING, (re)start the DC timer and wait for I_JOIN_OFFER or I_NOT_DC */ +/* Got an I_PENDING */ + { + /* S_IDLE ==> */ O_RELEASE | O_DC_TIMER_RESTART, + /* S_ELECTION ==> */ O_RELEASE | O_DC_TIMER_RESTART, + /* S_INTEGRATION ==> */ O_RELEASE | O_DC_TIMER_RESTART, + /* S_FINALIZE_JOIN ==> */ O_RELEASE | O_DC_TIMER_RESTART, + /* S_NOT_DC ==> */ A_LOG | O_DC_TIMER_RESTART, + /* S_POLICY_ENGINE ==> */ O_RELEASE | O_DC_TIMER_RESTART, + /* S_RECOVERY ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_WARN | O_DC_TIMER_RESTART, + /* S_STARTING ==> */ A_LOG | A_DC_TIMER_START | A_CL_JOIN_QUERY, + /* S_PENDING ==> */ A_LOG | O_DC_TIMER_RESTART, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ O_RELEASE | O_DC_TIMER_RESTART, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_HALT */ + { + /* S_IDLE ==> */ A_WARN, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_WARN, + /* S_FINALIZE_JOIN ==> */ A_WARN, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_WARN, + /* S_RECOVERY ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_WARN, + /* S_HALT ==> */ A_WARN, + }, +}; + +/*! + * \internal + * \brief Get the next FSA state given an input and the current state + * + * \param[in] input FSA input + * + * \return The next FSA state + */ +enum crmd_fsa_state +controld_fsa_get_next_state(enum crmd_fsa_input input) +{ + return fsa_next_states[input][controld_globals.fsa_state]; +} + +/*! + * \internal + * \brief Get the appropriate FSA action given an input and the current state + * + * \param[in] input FSA input + * + * \return The appropriate FSA action + */ +uint64_t +controld_fsa_get_action(enum crmd_fsa_input input) +{ + return fsa_actions[input][controld_globals.fsa_state]; +} diff --git a/daemons/controld/controld_membership.c b/daemons/controld/controld_membership.c new file mode 100644 index 0000000..1f7e4c0 --- /dev/null +++ b/daemons/controld/controld_membership.c @@ -0,0 +1,457 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +/* put these first so that uuid_t is defined without conflicts */ +#include <crm_internal.h> + +#include <string.h> + +#include <crm/crm.h> +#include <crm/msg_xml.h> +#include <crm/common/xml.h> +#include <crm/common/xml_internal.h> +#include <crm/cluster/internal.h> + +#include <pacemaker-controld.h> + +void post_cache_update(int instance); + +extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); + +static void +reap_dead_nodes(gpointer key, gpointer value, gpointer user_data) +{ + crm_node_t *node = value; + + if (crm_is_peer_active(node) == FALSE) { + crm_update_peer_join(__func__, node, crm_join_none); + + if(node && node->uname) { + if (pcmk__str_eq(controld_globals.our_nodename, node->uname, + pcmk__str_casei)) { + crm_err("We're not part of the cluster anymore"); + register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); + + } else if (!AM_I_DC + && pcmk__str_eq(node->uname, controld_globals.dc_name, + pcmk__str_casei)) { + crm_warn("Our DC node (%s) left the cluster", node->uname); + register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); + } + } + + if ((controld_globals.fsa_state == S_INTEGRATION) + || (controld_globals.fsa_state == S_FINALIZE_JOIN)) { + check_join_state(controld_globals.fsa_state, __func__); + } + if ((node != NULL) && (node->uuid != NULL)) { + fail_incompletable_actions(controld_globals.transition_graph, + node->uuid); + } + } +} + +void +post_cache_update(int instance) +{ + xmlNode *no_op = NULL; + + crm_peer_seq = instance; + crm_debug("Updated cache after membership event %d.", instance); + + g_hash_table_foreach(crm_peer_cache, reap_dead_nodes, NULL); + controld_set_fsa_input_flags(R_MEMBERSHIP); + + if (AM_I_DC) { + populate_cib_nodes(node_update_quick | node_update_cluster | node_update_peer | + node_update_expected, __func__); + } + + /* + * If we lost nodes, we should re-check the election status + * Safe to call outside of an election + */ + controld_set_fsa_action_flags(A_ELECTION_CHECK); + controld_trigger_fsa(); + + /* Membership changed, remind everyone we're here. + * This will aid detection of duplicate DCs + */ + no_op = create_request(CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_CRMD, + AM_I_DC ? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD, NULL); + send_cluster_message(NULL, crm_msg_crmd, no_op, FALSE); + free_xml(no_op); +} + +static void +crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + fsa_data_t *msg_data = NULL; + + if (rc == pcmk_ok) { + crm_trace("Node update %d complete", call_id); + + } else if(call_id < pcmk_ok) { + crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id); + crm_log_xml_debug(msg, "failed"); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + + } else { + crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); + crm_log_xml_debug(msg, "failed"); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } +} + +/*! + * \internal + * \brief Create an XML node state tag with updates + * + * \param[in,out] node Node whose state will be used for update + * \param[in] flags Bitmask of node_update_flags indicating what to update + * \param[in,out] parent XML node to contain update (or NULL) + * \param[in] source Who requested the update (only used for logging) + * + * \return Pointer to created node state tag + */ +xmlNode * +create_node_state_update(crm_node_t *node, int flags, xmlNode *parent, + const char *source) +{ + const char *value = NULL; + xmlNode *node_state; + + if (!node->state) { + crm_info("Node update for %s cancelled: no state, not seen yet", node->uname); + return NULL; + } + + node_state = create_xml_node(parent, XML_CIB_TAG_STATE); + + if (pcmk_is_set(node->flags, crm_remote_node)) { + pcmk__xe_set_bool_attr(node_state, XML_NODE_IS_REMOTE, true); + } + + set_uuid(node_state, XML_ATTR_ID, node); + + if (crm_element_value(node_state, XML_ATTR_ID) == NULL) { + crm_info("Node update for %s cancelled: no id", node->uname); + free_xml(node_state); + return NULL; + } + + crm_xml_add(node_state, XML_ATTR_UNAME, node->uname); + + if ((flags & node_update_cluster) && node->state) { + pcmk__xe_set_bool_attr(node_state, XML_NODE_IN_CLUSTER, + pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)); + } + + if (!pcmk_is_set(node->flags, crm_remote_node)) { + if (flags & node_update_peer) { + value = OFFLINESTATUS; + if (pcmk_is_set(node->processes, crm_get_cluster_proc())) { + value = ONLINESTATUS; + } + crm_xml_add(node_state, XML_NODE_IS_PEER, value); + } + + if (flags & node_update_join) { + if (node->join <= crm_join_none) { + value = CRMD_JOINSTATE_DOWN; + } else { + value = CRMD_JOINSTATE_MEMBER; + } + crm_xml_add(node_state, XML_NODE_JOIN_STATE, value); + } + + if (flags & node_update_expected) { + crm_xml_add(node_state, XML_NODE_EXPECTED, node->expected); + } + } + + crm_xml_add(node_state, XML_ATTR_ORIGIN, source); + + return node_state; +} + +static void +remove_conflicting_node_callback(xmlNode * msg, int call_id, int rc, + xmlNode * output, void *user_data) +{ + char *node_uuid = user_data; + + do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE, + "Deletion of the unknown conflicting node \"%s\": %s (rc=%d)", + node_uuid, pcmk_strerror(rc), rc); +} + +static void +search_conflicting_node_callback(xmlNode * msg, int call_id, int rc, + xmlNode * output, void *user_data) +{ + char *new_node_uuid = user_data; + xmlNode *node_xml = NULL; + + if (rc != pcmk_ok) { + if (rc != -ENXIO) { + crm_notice("Searching conflicting nodes for %s failed: %s (%d)", + new_node_uuid, pcmk_strerror(rc), rc); + } + return; + + } else if (output == NULL) { + return; + } + + if (pcmk__str_eq(crm_element_name(output), XML_CIB_TAG_NODE, pcmk__str_casei)) { + node_xml = output; + + } else { + node_xml = pcmk__xml_first_child(output); + } + + for (; node_xml != NULL; node_xml = pcmk__xml_next(node_xml)) { + const char *node_uuid = NULL; + const char *node_uname = NULL; + GHashTableIter iter; + crm_node_t *node = NULL; + gboolean known = FALSE; + + if (!pcmk__str_eq(crm_element_name(node_xml), XML_CIB_TAG_NODE, pcmk__str_casei)) { + continue; + } + + node_uuid = crm_element_value(node_xml, XML_ATTR_ID); + node_uname = crm_element_value(node_xml, XML_ATTR_UNAME); + + if (node_uuid == NULL || node_uname == NULL) { + continue; + } + + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + if (node->uuid + && pcmk__str_eq(node->uuid, node_uuid, pcmk__str_casei) + && node->uname + && pcmk__str_eq(node->uname, node_uname, pcmk__str_casei)) { + + known = TRUE; + break; + } + } + + if (known == FALSE) { + cib_t *cib_conn = controld_globals.cib_conn; + int delete_call_id = 0; + xmlNode *node_state_xml = NULL; + + crm_notice("Deleting unknown node %s/%s which has conflicting uname with %s", + node_uuid, node_uname, new_node_uuid); + + delete_call_id = cib_conn->cmds->remove(cib_conn, XML_CIB_TAG_NODES, + node_xml, cib_scope_local); + fsa_register_cib_callback(delete_call_id, strdup(node_uuid), + remove_conflicting_node_callback); + + node_state_xml = create_xml_node(NULL, XML_CIB_TAG_STATE); + crm_xml_add(node_state_xml, XML_ATTR_ID, node_uuid); + crm_xml_add(node_state_xml, XML_ATTR_UNAME, node_uname); + + delete_call_id = cib_conn->cmds->remove(cib_conn, + XML_CIB_TAG_STATUS, + node_state_xml, + cib_scope_local); + fsa_register_cib_callback(delete_call_id, strdup(node_uuid), + remove_conflicting_node_callback); + free_xml(node_state_xml); + } + } +} + +static void +node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + fsa_data_t *msg_data = NULL; + + if(call_id < pcmk_ok) { + crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id); + crm_log_xml_debug(msg, "update:failed"); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + + } else if(rc < pcmk_ok) { + crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); + crm_log_xml_debug(msg, "update:failed"); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } +} + +void +populate_cib_nodes(enum node_update_flags flags, const char *source) +{ + cib_t *cib_conn = controld_globals.cib_conn; + + int call_id = 0; + gboolean from_hashtable = TRUE; + xmlNode *node_list = create_xml_node(NULL, XML_CIB_TAG_NODES); + +#if SUPPORT_COROSYNC + if (!pcmk_is_set(flags, node_update_quick) && is_corosync_cluster()) { + from_hashtable = pcmk__corosync_add_nodes(node_list); + } +#endif + + if (from_hashtable) { + GHashTableIter iter; + crm_node_t *node = NULL; + GString *xpath = NULL; + + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + xmlNode *new_node = NULL; + + if ((node->uuid != NULL) && (node->uname != NULL)) { + crm_trace("Creating node entry for %s/%s", node->uname, node->uuid); + if (xpath == NULL) { + xpath = g_string_sized_new(512); + } else { + g_string_truncate(xpath, 0); + } + + /* We need both to be valid */ + new_node = create_xml_node(node_list, XML_CIB_TAG_NODE); + crm_xml_add(new_node, XML_ATTR_ID, node->uuid); + crm_xml_add(new_node, XML_ATTR_UNAME, node->uname); + + /* Search and remove unknown nodes with the conflicting uname from CIB */ + pcmk__g_strcat(xpath, + "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION + "/" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE + "[@" XML_ATTR_UNAME "='", node->uname, "']" + "[@" XML_ATTR_ID "!='", node->uuid, "']", NULL); + + call_id = cib_conn->cmds->query(cib_conn, + (const char *) xpath->str, + NULL, + cib_scope_local|cib_xpath); + fsa_register_cib_callback(call_id, strdup(node->uuid), + search_conflicting_node_callback); + } + } + + if (xpath != NULL) { + g_string_free(xpath, TRUE); + } + } + + crm_trace("Populating <nodes> section from %s", from_hashtable ? "hashtable" : "cluster"); + + if ((controld_update_cib(XML_CIB_TAG_NODES, node_list, cib_scope_local, + node_list_update_callback) == pcmk_rc_ok) + && (crm_peer_cache != NULL) && AM_I_DC) { + /* + * There is no need to update the local CIB with our values if + * we've not seen valid membership data + */ + GHashTableIter iter; + crm_node_t *node = NULL; + + free_xml(node_list); + node_list = create_xml_node(NULL, XML_CIB_TAG_STATUS); + + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + create_node_state_update(node, flags, node_list, source); + } + + if (crm_remote_peer_cache) { + g_hash_table_iter_init(&iter, crm_remote_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + create_node_state_update(node, flags, node_list, source); + } + } + + controld_update_cib(XML_CIB_TAG_STATUS, node_list, cib_scope_local, + crmd_node_update_complete); + } + free_xml(node_list); +} + +static void +cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + fsa_data_t *msg_data = NULL; + + if (rc == pcmk_ok) { + crm_trace("Quorum update %d complete", call_id); + + } else { + crm_err("Quorum update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); + crm_log_xml_debug(msg, "failed"); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } +} + +void +crm_update_quorum(gboolean quorum, gboolean force_update) +{ + bool has_quorum = pcmk_is_set(controld_globals.flags, controld_has_quorum); + + if (quorum) { + controld_set_global_flags(controld_ever_had_quorum); + + } else if (pcmk_all_flags_set(controld_globals.flags, + controld_ever_had_quorum + |controld_no_quorum_suicide)) { + pcmk__panic(__func__); + } + + if (AM_I_DC + && ((has_quorum && !quorum) || (!has_quorum && quorum) + || force_update)) { + xmlNode *update = NULL; + + update = create_xml_node(NULL, XML_TAG_CIB); + crm_xml_add_int(update, XML_ATTR_HAVE_QUORUM, quorum); + crm_xml_add(update, XML_ATTR_DC_UUID, controld_globals.our_uuid); + + crm_debug("Updating quorum status to %s", pcmk__btoa(quorum)); + controld_update_cib(XML_TAG_CIB, update, cib_scope_local, + cib_quorum_update_complete); + free_xml(update); + + /* Quorum changes usually cause a new transition via other activity: + * quorum gained via a node joining will abort via the node join, + * and quorum lost via a node leaving will usually abort via resource + * activity and/or fencing. + * + * However, it is possible that nothing else causes a transition (e.g. + * someone forces quorum via corosync-cmaptcl, or quorum is lost due to + * a node in standby shutting down cleanly), so here ensure a new + * transition is triggered. + */ + if (quorum) { + /* If quorum was gained, abort after a short delay, in case multiple + * nodes are joining around the same time, so the one that brings us + * to quorum doesn't cause all the remaining ones to be fenced. + */ + abort_after_delay(INFINITY, pcmk__graph_restart, "Quorum gained", + 5000); + } else { + abort_transition(INFINITY, pcmk__graph_restart, "Quorum lost", + NULL); + } + } + + if (quorum) { + controld_set_global_flags(controld_has_quorum); + } else { + controld_clear_global_flags(controld_has_quorum); + } +} diff --git a/daemons/controld/controld_membership.h b/daemons/controld/controld_membership.h new file mode 100644 index 0000000..cfe8cee --- /dev/null +++ b/daemons/controld/controld_membership.h @@ -0,0 +1,29 @@ +/* + * Copyright 2012-2021 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ +#ifndef MEMBERSHIP__H +# define MEMBERSHIP__H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <crm/cluster/internal.h> + +void post_cache_update(int instance); + +extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); + +void controld_destroy_failed_sync_table(void); +void controld_remove_failed_sync_node(const char *node_name); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c new file mode 100644 index 0000000..54b27ec --- /dev/null +++ b/daemons/controld/controld_messages.c @@ -0,0 +1,1307 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <sys/param.h> +#include <string.h> +#include <time.h> + +#include <crm/crm.h> +#include <crm/msg_xml.h> +#include <crm/common/xml.h> +#include <crm/cluster/internal.h> +#include <crm/cib.h> +#include <crm/common/ipc_internal.h> + +#include <pacemaker-controld.h> + +extern void crm_shutdown(int nsig); + +static enum crmd_fsa_input handle_message(xmlNode *msg, + enum crmd_fsa_cause cause); +static void handle_response(xmlNode *stored_msg); +static enum crmd_fsa_input handle_request(xmlNode *stored_msg, + enum crmd_fsa_cause cause); +static enum crmd_fsa_input handle_shutdown_request(xmlNode *stored_msg); +static void send_msg_via_ipc(xmlNode * msg, const char *sys); + +/* debug only, can wrap all it likes */ +static int last_data_id = 0; + +void +register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, + fsa_data_t * cur_data, void *new_data, const char *raised_from) +{ + /* save the current actions if any */ + if (controld_globals.fsa_actions != A_NOTHING) { + register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL, + I_NULL, cur_data ? cur_data->data : NULL, + controld_globals.fsa_actions, TRUE, __func__); + } + + /* reset the action list */ + crm_info("Resetting the current action list"); + fsa_dump_actions(controld_globals.fsa_actions, "Drop"); + controld_globals.fsa_actions = A_NOTHING; + + /* register the error */ + register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from); +} + +void +register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, + void *data, uint64_t with_actions, + gboolean prepend, const char *raised_from) +{ + unsigned old_len = g_list_length(controld_globals.fsa_message_queue); + fsa_data_t *fsa_data = NULL; + + if (raised_from == NULL) { + raised_from = "<unknown>"; + } + + if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) { + /* no point doing anything */ + crm_err("Cannot add entry to queue: no input and no action"); + return; + } + + if (input == I_WAIT_FOR_EVENT) { + controld_set_global_flags(controld_fsa_is_stalled); + crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d", + raised_from, fsa_cause2string(cause), data, old_len); + + if (old_len > 0) { + fsa_dump_queue(LOG_TRACE); + prepend = FALSE; + } + + if (data == NULL) { + controld_set_fsa_action_flags(with_actions); + fsa_dump_actions(with_actions, "Restored"); + return; + } + + /* Store everything in the new event and reset + * controld_globals.fsa_actions + */ + with_actions |= controld_globals.fsa_actions; + controld_globals.fsa_actions = A_NOTHING; + } + + last_data_id++; + crm_trace("%s %s FSA input %d (%s) due to %s, %s data", + raised_from, (prepend? "prepended" : "appended"), last_data_id, + fsa_input2string(input), fsa_cause2string(cause), + (data? "with" : "without")); + + fsa_data = calloc(1, sizeof(fsa_data_t)); + fsa_data->id = last_data_id; + fsa_data->fsa_input = input; + fsa_data->fsa_cause = cause; + fsa_data->origin = raised_from; + fsa_data->data = NULL; + fsa_data->data_type = fsa_dt_none; + fsa_data->actions = with_actions; + + if (with_actions != A_NOTHING) { + crm_trace("Adding actions %.16llx to input", + (unsigned long long) with_actions); + } + + if (data != NULL) { + switch (cause) { + case C_FSA_INTERNAL: + case C_CRMD_STATUS_CALLBACK: + case C_IPC_MESSAGE: + case C_HA_MESSAGE: + CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL, + crm_err("Bogus data from %s", raised_from)); + crm_trace("Copying %s data from %s as cluster message data", + fsa_cause2string(cause), raised_from); + fsa_data->data = copy_ha_msg_input(data); + fsa_data->data_type = fsa_dt_ha_msg; + break; + + case C_LRM_OP_CALLBACK: + crm_trace("Copying %s data from %s as lrmd_event_data_t", + fsa_cause2string(cause), raised_from); + fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data); + fsa_data->data_type = fsa_dt_lrm; + break; + + case C_TIMER_POPPED: + case C_SHUTDOWN: + case C_UNKNOWN: + case C_STARTUP: + crm_crit("Copying %s data (from %s) is not yet implemented", + fsa_cause2string(cause), raised_from); + crmd_exit(CRM_EX_SOFTWARE); + break; + } + } + + /* make sure to free it properly later */ + if (prepend) { + controld_globals.fsa_message_queue + = g_list_prepend(controld_globals.fsa_message_queue, fsa_data); + } else { + controld_globals.fsa_message_queue + = g_list_append(controld_globals.fsa_message_queue, fsa_data); + } + + crm_trace("FSA message queue length is %d", + g_list_length(controld_globals.fsa_message_queue)); + + /* fsa_dump_queue(LOG_TRACE); */ + + if (old_len == g_list_length(controld_globals.fsa_message_queue)) { + crm_err("Couldn't add message to the queue"); + } + + if (input != I_WAIT_FOR_EVENT) { + controld_trigger_fsa(); + } +} + +void +fsa_dump_queue(int log_level) +{ + int offset = 0; + + for (GList *iter = controld_globals.fsa_message_queue; iter != NULL; + iter = iter->next) { + fsa_data_t *data = (fsa_data_t *) iter->data; + + do_crm_log_unlikely(log_level, + "queue[%d.%d]: input %s raised by %s(%p.%d)\t(cause=%s)", + offset++, data->id, fsa_input2string(data->fsa_input), + data->origin, data->data, data->data_type, + fsa_cause2string(data->fsa_cause)); + } +} + +ha_msg_input_t * +copy_ha_msg_input(ha_msg_input_t * orig) +{ + ha_msg_input_t *copy = calloc(1, sizeof(ha_msg_input_t)); + + CRM_ASSERT(copy != NULL); + copy->msg = (orig && orig->msg)? copy_xml(orig->msg) : NULL; + copy->xml = get_message_xml(copy->msg, F_CRM_DATA); + return copy; +} + +void +delete_fsa_input(fsa_data_t * fsa_data) +{ + lrmd_event_data_t *op = NULL; + xmlNode *foo = NULL; + + if (fsa_data == NULL) { + return; + } + crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause)); + + if (fsa_data->data != NULL) { + switch (fsa_data->data_type) { + case fsa_dt_ha_msg: + delete_ha_msg_input(fsa_data->data); + break; + + case fsa_dt_xml: + foo = fsa_data->data; + free_xml(foo); + break; + + case fsa_dt_lrm: + op = (lrmd_event_data_t *) fsa_data->data; + lrmd_free_event(op); + break; + + case fsa_dt_none: + if (fsa_data->data != NULL) { + crm_err("Don't know how to free %s data from %s", + fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); + crmd_exit(CRM_EX_SOFTWARE); + } + break; + } + crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause)); + } + + free(fsa_data); +} + +/* returns the next message */ +fsa_data_t * +get_message(void) +{ + fsa_data_t *message + = (fsa_data_t *) controld_globals.fsa_message_queue->data; + + controld_globals.fsa_message_queue + = g_list_remove(controld_globals.fsa_message_queue, message); + crm_trace("Processing input %d", message->id); + return message; +} + +void * +fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller) +{ + void *ret_val = NULL; + + if (fsa_data == NULL) { + crm_err("%s: No FSA data available", caller); + + } else if (fsa_data->data == NULL) { + crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin); + + } else if (fsa_data->data_type != a_type) { + crm_crit("%s: Message data was the wrong type! %d vs. requested=%d. Origin: %s", + caller, fsa_data->data_type, a_type, fsa_data->origin); + CRM_ASSERT(fsa_data->data_type == a_type); + } else { + ret_val = fsa_data->data; + } + + return ret_val; +} + +/* A_MSG_ROUTE */ +void +do_msg_route(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); + + route_message(msg_data->fsa_cause, input->msg); +} + +void +route_message(enum crmd_fsa_cause cause, xmlNode * input) +{ + ha_msg_input_t fsa_input; + enum crmd_fsa_input result = I_NULL; + + fsa_input.msg = input; + CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return); + + /* try passing the buck first */ + if (relay_message(input, cause == C_IPC_MESSAGE)) { + return; + } + + /* handle locally */ + result = handle_message(input, cause); + + /* done or process later? */ + switch (result) { + case I_NULL: + case I_CIB_OP: + case I_ROUTER: + case I_NODE_JOIN: + case I_JOIN_REQUEST: + case I_JOIN_RESULT: + break; + default: + /* Defering local processing of message */ + register_fsa_input_later(cause, result, &fsa_input); + return; + } + + if (result != I_NULL) { + /* add to the front of the queue */ + register_fsa_input(cause, result, &fsa_input); + } +} + +gboolean +relay_message(xmlNode * msg, gboolean originated_locally) +{ + int dest = 1; + bool is_for_dc = false; + bool is_for_dcib = false; + bool is_for_te = false; + bool is_for_crm = false; + bool is_for_cib = false; + bool is_local = false; + const char *host_to = crm_element_value(msg, F_CRM_HOST_TO); + const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); + const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); + const char *type = crm_element_value(msg, F_TYPE); + const char *task = crm_element_value(msg, F_CRM_TASK); + const char *ref = crm_element_value(msg, XML_ATTR_REFERENCE); + + if (ref == NULL) { + ref = "without reference ID"; + } + + if (msg == NULL) { + crm_warn("Cannot route empty message"); + return TRUE; + + } else if (pcmk__str_eq(task, CRM_OP_HELLO, pcmk__str_casei)) { + crm_trace("No routing needed for hello message %s", ref); + return TRUE; + + } else if (!pcmk__str_eq(type, T_CRM, pcmk__str_casei)) { + crm_warn("Received invalid message %s: type '%s' not '" T_CRM "'", + ref, pcmk__s(type, "")); + crm_log_xml_warn(msg, "[bad message type]"); + return TRUE; + + } else if (sys_to == NULL) { + crm_warn("Received invalid message %s: no subsystem", ref); + crm_log_xml_warn(msg, "[no subsystem]"); + return TRUE; + } + + is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0); + is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0); + is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0); + is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0); + is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0); + + is_local = false; + if (pcmk__str_empty(host_to)) { + if (is_for_dc || is_for_te) { + is_local = false; + + } else if (is_for_crm) { + if (pcmk__strcase_any_of(task, CRM_OP_NODE_INFO, + PCMK__CONTROLD_CMD_NODES, NULL)) { + /* Node info requests do not specify a host, which is normally + * treated as "all hosts", because the whole point is that the + * client may not know the local node name. Always handle these + * requests locally. + */ + is_local = true; + } else { + is_local = !originated_locally; + } + + } else { + is_local = true; + } + + } else if (pcmk__str_eq(controld_globals.our_nodename, host_to, + pcmk__str_casei)) { + is_local = true; + } else if (is_for_crm && pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) { + xmlNode *msg_data = get_message_xml(msg, F_CRM_DATA); + const char *mode = crm_element_value(msg_data, PCMK__XA_MODE); + + if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_casei)) { + // Local delete of an offline node's resource history + is_local = true; + } + } + + if (is_for_dc || is_for_dcib || is_for_te) { + if (AM_I_DC && is_for_te) { + crm_trace("Route message %s locally as transition request", ref); + send_msg_via_ipc(msg, sys_to); + + } else if (AM_I_DC) { + crm_trace("Route message %s locally as DC request", ref); + return FALSE; // More to be done by caller + + } else if (originated_locally && !pcmk__strcase_any_of(sys_from, CRM_SYSTEM_PENGINE, + CRM_SYSTEM_TENGINE, NULL)) { + + if (is_corosync_cluster()) { + dest = text2msg_type(sys_to); + } + crm_trace("Relay message %s to DC", ref); + send_cluster_message(host_to ? crm_get_peer(0, host_to) : NULL, dest, msg, TRUE); + + } else { + /* Neither the TE nor the scheduler should be sending messages + * to DCs on other nodes. By definition, if we are no longer the DC, + * then the scheduler's or TE's data should be discarded. + */ + crm_trace("Discard message %s because we are not DC", ref); + } + + } else if (is_local && (is_for_crm || is_for_cib)) { + crm_trace("Route message %s locally as controller request", ref); + return FALSE; // More to be done by caller + + } else if (is_local) { + crm_trace("Relay message %s locally to %s", + ref, (sys_to? sys_to : "unknown client")); + crm_log_xml_trace(msg, "[IPC relay]"); + send_msg_via_ipc(msg, sys_to); + + } else { + crm_node_t *node_to = NULL; + + if (is_corosync_cluster()) { + dest = text2msg_type(sys_to); + + if (dest == crm_msg_none || dest > crm_msg_stonith_ng) { + dest = crm_msg_crmd; + } + } + + if (host_to) { + node_to = pcmk__search_cluster_node_cache(0, host_to); + if (node_to == NULL) { + crm_warn("Cannot route message %s: Unknown node %s", + ref, host_to); + return TRUE; + } + crm_trace("Relay message %s to %s", + ref, (node_to->uname? node_to->uname : "peer")); + } else { + crm_trace("Broadcast message %s to all peers", ref); + } + send_cluster_message(host_to ? node_to : NULL, dest, msg, TRUE); + } + + return TRUE; // No further processing of message is needed +} + +// Return true if field contains a positive integer +static bool +authorize_version(xmlNode *message_data, const char *field, + const char *client_name, const char *ref, const char *uuid) +{ + const char *version = crm_element_value(message_data, field); + long long version_num; + + if ((pcmk__scan_ll(version, &version_num, -1LL) != pcmk_rc_ok) + || (version_num < 0LL)) { + + crm_warn("Rejected IPC hello from %s: '%s' is not a valid protocol %s " + CRM_XS " ref=%s uuid=%s", + client_name, ((version == NULL)? "" : version), + field, (ref? ref : "none"), uuid); + return false; + } + return true; +} + +/*! + * \internal + * \brief Check whether a client IPC message is acceptable + * + * If a given client IPC message is a hello, "authorize" it by ensuring it has + * valid information such as a protocol version, and return false indicating + * that nothing further needs to be done with the message. If the message is not + * a hello, just return true to indicate it needs further processing. + * + * \param[in] client_msg XML of IPC message + * \param[in,out] curr_client If IPC is not proxied, client that sent message + * \param[in] proxy_session If IPC is proxied, the session ID + * + * \return true if message needs further processing, false if it doesn't + */ +bool +controld_authorize_ipc_message(const xmlNode *client_msg, pcmk__client_t *curr_client, + const char *proxy_session) +{ + xmlNode *message_data = NULL; + const char *client_name = NULL; + const char *op = crm_element_value(client_msg, F_CRM_TASK); + const char *ref = crm_element_value(client_msg, XML_ATTR_REFERENCE); + const char *uuid = (curr_client? curr_client->id : proxy_session); + + if (uuid == NULL) { + crm_warn("IPC message from client rejected: No client identifier " + CRM_XS " ref=%s", (ref? ref : "none")); + goto rejected; + } + + if (!pcmk__str_eq(CRM_OP_HELLO, op, pcmk__str_casei)) { + // Only hello messages need to be authorized + return true; + } + + message_data = get_message_xml(client_msg, F_CRM_DATA); + + client_name = crm_element_value(message_data, "client_name"); + if (pcmk__str_empty(client_name)) { + crm_warn("IPC hello from client rejected: No client name", + CRM_XS " ref=%s uuid=%s", (ref? ref : "none"), uuid); + goto rejected; + } + if (!authorize_version(message_data, "major_version", client_name, ref, + uuid)) { + goto rejected; + } + if (!authorize_version(message_data, "minor_version", client_name, ref, + uuid)) { + goto rejected; + } + + crm_trace("Validated IPC hello from client %s", client_name); + if (curr_client) { + curr_client->userdata = strdup(client_name); + } + controld_trigger_fsa(); + return false; + +rejected: + if (curr_client) { + qb_ipcs_disconnect(curr_client->ipcs); + } + return false; +} + +static enum crmd_fsa_input +handle_message(xmlNode *msg, enum crmd_fsa_cause cause) +{ + const char *type = NULL; + + CRM_CHECK(msg != NULL, return I_NULL); + + type = crm_element_value(msg, F_CRM_MSG_TYPE); + if (pcmk__str_eq(type, XML_ATTR_REQUEST, pcmk__str_none)) { + return handle_request(msg, cause); + + } else if (pcmk__str_eq(type, XML_ATTR_RESPONSE, pcmk__str_none)) { + handle_response(msg); + return I_NULL; + } + + crm_err("Unknown message type: %s", type); + return I_NULL; +} + +static enum crmd_fsa_input +handle_failcount_op(xmlNode * stored_msg) +{ + const char *rsc = NULL; + const char *uname = NULL; + const char *op = NULL; + char *interval_spec = NULL; + guint interval_ms = 0; + gboolean is_remote_node = FALSE; + xmlNode *xml_op = get_message_xml(stored_msg, F_CRM_DATA); + + if (xml_op) { + xmlNode *xml_rsc = first_named_child(xml_op, XML_CIB_TAG_RESOURCE); + xmlNode *xml_attrs = first_named_child(xml_op, XML_TAG_ATTRS); + + if (xml_rsc) { + rsc = ID(xml_rsc); + } + if (xml_attrs) { + op = crm_element_value(xml_attrs, + CRM_META "_" XML_RSC_ATTR_CLEAR_OP); + crm_element_value_ms(xml_attrs, + CRM_META "_" XML_RSC_ATTR_CLEAR_INTERVAL, + &interval_ms); + } + } + uname = crm_element_value(xml_op, XML_LRM_ATTR_TARGET); + + if ((rsc == NULL) || (uname == NULL)) { + crm_log_xml_warn(stored_msg, "invalid failcount op"); + return I_NULL; + } + + if (crm_element_value(xml_op, XML_LRM_ATTR_ROUTER_NODE)) { + is_remote_node = TRUE; + } + + crm_debug("Clearing failures for %s-interval %s on %s " + "from attribute manager, CIB, and executor state", + pcmk__readable_interval(interval_ms), rsc, uname); + + if (interval_ms) { + interval_spec = crm_strdup_printf("%ums", interval_ms); + } + update_attrd_clear_failures(uname, rsc, op, interval_spec, is_remote_node); + free(interval_spec); + + controld_cib_delete_last_failure(rsc, uname, op, interval_ms); + + lrm_clear_last_failure(rsc, uname, op, interval_ms); + + return I_NULL; +} + +static enum crmd_fsa_input +handle_lrm_delete(xmlNode *stored_msg) +{ + const char *mode = NULL; + xmlNode *msg_data = get_message_xml(stored_msg, F_CRM_DATA); + + CRM_CHECK(msg_data != NULL, return I_NULL); + + /* CRM_OP_LRM_DELETE has two distinct modes. The default behavior is to + * relay the operation to the affected node, which will unregister the + * resource from the local executor, clear the resource's history from the + * CIB, and do some bookkeeping in the controller. + * + * However, if the affected node is offline, the client will specify + * mode="cib" which means the controller receiving the operation should + * clear the resource's history from the CIB and nothing else. This is used + * to clear shutdown locks. + */ + mode = crm_element_value(msg_data, PCMK__XA_MODE); + if ((mode == NULL) || strcmp(mode, XML_TAG_CIB)) { + // Relay to affected node + crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD); + return I_ROUTER; + + } else { + // Delete CIB history locally (compare with do_lrm_delete()) + const char *from_sys = NULL; + const char *user_name = NULL; + const char *rsc_id = NULL; + const char *node = NULL; + xmlNode *rsc_xml = NULL; + int rc = pcmk_rc_ok; + + rsc_xml = first_named_child(msg_data, XML_CIB_TAG_RESOURCE); + CRM_CHECK(rsc_xml != NULL, return I_NULL); + + rsc_id = ID(rsc_xml); + from_sys = crm_element_value(stored_msg, F_CRM_SYS_FROM); + node = crm_element_value(msg_data, XML_LRM_ATTR_TARGET); + user_name = pcmk__update_acl_user(stored_msg, F_CRM_USER, NULL); + crm_debug("Handling " CRM_OP_LRM_DELETE " for %s on %s locally%s%s " + "(clearing CIB resource history only)", rsc_id, node, + (user_name? " for user " : ""), (user_name? user_name : "")); + rc = controld_delete_resource_history(rsc_id, node, user_name, + cib_dryrun|cib_sync_call); + if (rc == pcmk_rc_ok) { + rc = controld_delete_resource_history(rsc_id, node, user_name, + crmd_cib_smart_opt()); + } + + //Notify client and tengine.(Only notify tengine if mode = "cib" and CRM_OP_LRM_DELETE.) + if (from_sys) { + lrmd_event_data_t *op = NULL; + const char *from_host = crm_element_value(stored_msg, + F_CRM_HOST_FROM); + const char *transition; + + if (strcmp(from_sys, CRM_SYSTEM_TENGINE)) { + transition = crm_element_value(msg_data, + XML_ATTR_TRANSITION_KEY); + } else { + transition = crm_element_value(stored_msg, + XML_ATTR_TRANSITION_KEY); + } + + crm_info("Notifying %s on %s that %s was%s deleted", + from_sys, (from_host? from_host : "local node"), rsc_id, + ((rc == pcmk_rc_ok)? "" : " not")); + op = lrmd_new_event(rsc_id, CRMD_ACTION_DELETE, 0); + op->type = lrmd_event_exec_complete; + op->user_data = strdup(transition? transition : FAKE_TE_ID); + op->params = pcmk__strkey_table(free, free); + g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), + strdup(CRM_FEATURE_SET)); + controld_rc2event(op, rc); + controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id); + lrmd_free_event(op); + controld_trigger_delete_refresh(from_sys, rsc_id); + } + return I_NULL; + } +} + +/*! + * \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache + * + * \param[in] msg Message XML + * + * \return Next FSA input + */ +static enum crmd_fsa_input +handle_remote_state(const xmlNode *msg) +{ + const char *conn_host = NULL; + const char *remote_uname = ID(msg); + crm_node_t *remote_peer; + bool remote_is_up = false; + int rc = pcmk_rc_ok; + + rc = pcmk__xe_get_bool_attr(msg, XML_NODE_IN_CLUSTER, &remote_is_up); + + CRM_CHECK(remote_uname && rc == pcmk_rc_ok, return I_NULL); + + remote_peer = crm_remote_peer_get(remote_uname); + CRM_CHECK(remote_peer, return I_NULL); + + pcmk__update_peer_state(__func__, remote_peer, + remote_is_up ? CRM_NODE_MEMBER : CRM_NODE_LOST, + 0); + + conn_host = crm_element_value(msg, PCMK__XA_CONN_HOST); + if (conn_host) { + pcmk__str_update(&remote_peer->conn_host, conn_host); + } else if (remote_peer->conn_host) { + free(remote_peer->conn_host); + remote_peer->conn_host = NULL; + } + + return I_NULL; +} + +/*! + * \brief Handle a CRM_OP_PING message + * + * \param[in] msg Message XML + * + * \return Next FSA input + */ +static enum crmd_fsa_input +handle_ping(const xmlNode *msg) +{ + const char *value = NULL; + xmlNode *ping = NULL; + xmlNode *reply = NULL; + + // Build reply + + ping = create_xml_node(NULL, XML_CRM_TAG_PING); + value = crm_element_value(msg, F_CRM_SYS_TO); + crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value); + + // Add controller state + value = fsa_state2string(controld_globals.fsa_state); + crm_xml_add(ping, XML_PING_ATTR_CRMDSTATE, value); + crm_notice("Current ping state: %s", value); // CTS needs this + + // Add controller health + // @TODO maybe do some checks to determine meaningful status + crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok"); + + // Send reply + reply = create_reply(msg, ping); + free_xml(ping); + if (reply != NULL) { + (void) relay_message(reply, TRUE); + free_xml(reply); + } + + // Nothing further to do + return I_NULL; +} + +/*! + * \brief Handle a PCMK__CONTROLD_CMD_NODES message + * + * \param[in] request Message XML + * + * \return Next FSA input + */ +static enum crmd_fsa_input +handle_node_list(const xmlNode *request) +{ + GHashTableIter iter; + crm_node_t *node = NULL; + xmlNode *reply = NULL; + xmlNode *reply_data = NULL; + + // Create message data for reply + reply_data = create_xml_node(NULL, XML_CIB_TAG_NODES); + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { + xmlNode *xml = create_xml_node(reply_data, XML_CIB_TAG_NODE); + + crm_xml_add_ll(xml, XML_ATTR_ID, (long long) node->id); // uint32_t + crm_xml_add(xml, XML_ATTR_UNAME, node->uname); + crm_xml_add(xml, XML_NODE_IN_CLUSTER, node->state); + } + + // Create and send reply + reply = create_reply(request, reply_data); + free_xml(reply_data); + if (reply) { + (void) relay_message(reply, TRUE); + free_xml(reply); + } + + // Nothing further to do + return I_NULL; +} + +/*! + * \brief Handle a CRM_OP_NODE_INFO request + * + * \param[in] msg Message XML + * + * \return Next FSA input + */ +static enum crmd_fsa_input +handle_node_info_request(const xmlNode *msg) +{ + const char *value = NULL; + crm_node_t *node = NULL; + int node_id = 0; + xmlNode *reply = NULL; + xmlNode *reply_data = NULL; + + // Build reply + + reply_data = create_xml_node(NULL, XML_CIB_TAG_NODE); + crm_xml_add(reply_data, XML_PING_ATTR_SYSFROM, CRM_SYSTEM_CRMD); + + // Add whether current partition has quorum + pcmk__xe_set_bool_attr(reply_data, XML_ATTR_HAVE_QUORUM, + pcmk_is_set(controld_globals.flags, + controld_has_quorum)); + + // Check whether client requested node info by ID and/or name + crm_element_value_int(msg, XML_ATTR_ID, &node_id); + if (node_id < 0) { + node_id = 0; + } + value = crm_element_value(msg, XML_ATTR_UNAME); + + // Default to local node if none given + if ((node_id == 0) && (value == NULL)) { + value = controld_globals.our_nodename; + } + + node = pcmk__search_node_caches(node_id, value, CRM_GET_PEER_ANY); + if (node) { + crm_xml_add(reply_data, XML_ATTR_ID, node->uuid); + crm_xml_add(reply_data, XML_ATTR_UNAME, node->uname); + crm_xml_add(reply_data, XML_NODE_IS_PEER, node->state); + pcmk__xe_set_bool_attr(reply_data, XML_NODE_IS_REMOTE, + pcmk_is_set(node->flags, crm_remote_node)); + } + + // Send reply + reply = create_reply(msg, reply_data); + free_xml(reply_data); + if (reply != NULL) { + (void) relay_message(reply, TRUE); + free_xml(reply); + } + + // Nothing further to do + return I_NULL; +} + +static void +verify_feature_set(xmlNode *msg) +{ + const char *dc_version = crm_element_value(msg, XML_ATTR_CRM_VERSION); + + if (dc_version == NULL) { + /* All we really know is that the DC feature set is older than 3.1.0, + * but that's also all that really matters. + */ + dc_version = "3.0.14"; + } + + if (feature_set_compatible(dc_version, CRM_FEATURE_SET)) { + crm_trace("Local feature set (%s) is compatible with DC's (%s)", + CRM_FEATURE_SET, dc_version); + } else { + crm_err("Local feature set (%s) is incompatible with DC's (%s)", + CRM_FEATURE_SET, dc_version); + + // Nothing is likely to improve without administrator involvement + controld_set_fsa_input_flags(R_STAYDOWN); + crmd_exit(CRM_EX_FATAL); + } +} + +// DC gets own shutdown all-clear +static enum crmd_fsa_input +handle_shutdown_self_ack(xmlNode *stored_msg) +{ + const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); + + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + // The expected case -- we initiated own shutdown sequence + crm_info("Shutting down controller"); + return I_STOP; + } + + if (pcmk__str_eq(host_from, controld_globals.dc_name, pcmk__str_casei)) { + // Must be logic error -- DC confirming its own unrequested shutdown + crm_err("Shutting down controller immediately due to " + "unexpected shutdown confirmation"); + return I_TERMINATE; + } + + if (controld_globals.fsa_state != S_STOPPING) { + // Shouldn't happen -- non-DC confirming unrequested shutdown + crm_err("Starting new DC election because %s is " + "confirming shutdown we did not request", + (host_from? host_from : "another node")); + return I_ELECTION; + } + + // Shouldn't happen, but we are already stopping anyway + crm_debug("Ignoring unexpected shutdown confirmation from %s", + (host_from? host_from : "another node")); + return I_NULL; +} + +// Non-DC gets shutdown all-clear from DC +static enum crmd_fsa_input +handle_shutdown_ack(xmlNode *stored_msg) +{ + const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); + + if (host_from == NULL) { + crm_warn("Ignoring shutdown request without origin specified"); + return I_NULL; + } + + if (pcmk__str_eq(host_from, controld_globals.dc_name, + pcmk__str_null_matches|pcmk__str_casei)) { + + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + crm_info("Shutting down controller after confirmation from %s", + host_from); + } else { + crm_err("Shutting down controller after unexpected " + "shutdown request from %s", host_from); + controld_set_fsa_input_flags(R_STAYDOWN); + } + return I_STOP; + } + + crm_warn("Ignoring shutdown request from %s because DC is %s", + host_from, controld_globals.dc_name); + return I_NULL; +} + +static enum crmd_fsa_input +handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause) +{ + xmlNode *msg = NULL; + const char *op = crm_element_value(stored_msg, F_CRM_TASK); + + /* Optimize this for the DC - it has the most to do */ + + if (op == NULL) { + crm_log_xml_warn(stored_msg, "[request without " F_CRM_TASK "]"); + return I_NULL; + } + + if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { + const char *from = crm_element_value(stored_msg, F_CRM_HOST_FROM); + crm_node_t *node = pcmk__search_cluster_node_cache(0, from); + + pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN); + if(AM_I_DC == FALSE) { + return I_NULL; /* Done */ + } + } + + /*========== DC-Only Actions ==========*/ + if (AM_I_DC) { + if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) { + return I_NODE_JOIN; + + } else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) { + return I_JOIN_REQUEST; + + } else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) { + return I_JOIN_RESULT; + + } else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) { + return handle_shutdown_self_ack(stored_msg); + + } else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { + // Another controller wants to shut down its node + return handle_shutdown_request(stored_msg); + } + } + + /*========== common actions ==========*/ + if (strcmp(op, CRM_OP_NOVOTE) == 0) { + ha_msg_input_t fsa_input; + + fsa_input.msg = stored_msg; + register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, + A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, + __func__); + + } else if (strcmp(op, CRM_OP_REMOTE_STATE) == 0) { + /* a remote connection host is letting us know the node state */ + return handle_remote_state(stored_msg); + + } else if (strcmp(op, CRM_OP_THROTTLE) == 0) { + throttle_update(stored_msg); + if (AM_I_DC && (controld_globals.transition_graph != NULL) + && !controld_globals.transition_graph->complete) { + + crm_debug("The throttle changed. Trigger a graph."); + trigger_graph(); + } + return I_NULL; + + } else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) { + return handle_failcount_op(stored_msg); + + } else if (strcmp(op, CRM_OP_VOTE) == 0) { + /* count the vote and decide what to do after that */ + ha_msg_input_t fsa_input; + + fsa_input.msg = stored_msg; + register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, + A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, + __func__); + + /* Sometimes we _must_ go into S_ELECTION */ + if (controld_globals.fsa_state == S_HALT) { + crm_debug("Forcing an election from S_HALT"); + return I_ELECTION; +#if 0 + } else if (AM_I_DC) { + /* This is the old way of doing things but what is gained? */ + return I_ELECTION; +#endif + } + + } else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) { + verify_feature_set(stored_msg); + crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); + return I_JOIN_OFFER; + + } else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) { + crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); + return I_JOIN_RESULT; + + } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0) { + return handle_lrm_delete(stored_msg); + + } else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0) + || (strcmp(op, CRM_OP_LRM_REFRESH) == 0) // @COMPAT + || (strcmp(op, CRM_OP_REPROBE) == 0)) { + + crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD); + return I_ROUTER; + + } else if (strcmp(op, CRM_OP_NOOP) == 0) { + return I_NULL; + + } else if (strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) { + + crm_shutdown(SIGTERM); + /*return I_SHUTDOWN; */ + return I_NULL; + + } else if (strcmp(op, CRM_OP_PING) == 0) { + return handle_ping(stored_msg); + + } else if (strcmp(op, CRM_OP_NODE_INFO) == 0) { + return handle_node_info_request(stored_msg); + + } else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) { + int id = 0; + const char *name = NULL; + + crm_element_value_int(stored_msg, XML_ATTR_ID, &id); + name = crm_element_value(stored_msg, XML_ATTR_UNAME); + + if(cause == C_IPC_MESSAGE) { + msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); + if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { + crm_err("Could not instruct peers to remove references to node %s/%u", name, id); + } else { + crm_notice("Instructing peers to remove references to node %s/%u", name, id); + } + free_xml(msg); + + } else { + reap_crm_member(id, name); + + /* If we're forgetting this node, also forget any failures to fence + * it, so we don't carry that over to any node added later with the + * same name. + */ + st_fail_count_reset(name); + } + + } else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) { + xmlNode *xml = get_message_xml(stored_msg, F_CRM_DATA); + + remote_ra_process_maintenance_nodes(xml); + + } else if (strcmp(op, PCMK__CONTROLD_CMD_NODES) == 0) { + return handle_node_list(stored_msg); + + /*========== (NOT_DC)-Only Actions ==========*/ + } else if (!AM_I_DC) { + + if (strcmp(op, CRM_OP_SHUTDOWN) == 0) { + return handle_shutdown_ack(stored_msg); + } + + } else { + crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node"); + crm_log_xml_err(stored_msg, "Unexpected"); + } + + return I_NULL; +} + +static void +handle_response(xmlNode *stored_msg) +{ + const char *op = crm_element_value(stored_msg, F_CRM_TASK); + + if (op == NULL) { + crm_log_xml_err(stored_msg, "Bad message"); + + } else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) { + // Check whether scheduler answer been superseded by subsequent request + const char *msg_ref = crm_element_value(stored_msg, XML_ATTR_REFERENCE); + + if (msg_ref == NULL) { + crm_err("%s - Ignoring calculation with no reference", op); + + } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref, + pcmk__str_none)) { + ha_msg_input_t fsa_input; + + controld_stop_sched_timer(); + fsa_input.msg = stored_msg; + register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input); + + } else { + crm_info("%s calculation %s is obsolete", op, msg_ref); + } + + } else if (strcmp(op, CRM_OP_VOTE) == 0 + || strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) { + + } else { + const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); + + crm_err("Unexpected response (op=%s, src=%s) sent to the %s", + op, host_from, AM_I_DC ? "DC" : "controller"); + } +} + +static enum crmd_fsa_input +handle_shutdown_request(xmlNode * stored_msg) +{ + /* handle here to avoid potential version issues + * where the shutdown message/procedure may have + * been changed in later versions. + * + * This way the DC is always in control of the shutdown + */ + + char *now_s = NULL; + const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); + + if (host_from == NULL) { + /* we're shutting down and the DC */ + host_from = controld_globals.our_nodename; + } + + crm_info("Creating shutdown request for %s (state=%s)", host_from, + fsa_state2string(controld_globals.fsa_state)); + crm_log_xml_trace(stored_msg, "message"); + + now_s = pcmk__ttoa(time(NULL)); + update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, FALSE); + free(now_s); + + /* will be picked up by the TE as long as its running */ + return I_NULL; +} + +static void +send_msg_via_ipc(xmlNode * msg, const char *sys) +{ + pcmk__client_t *client_channel = NULL; + + CRM_CHECK(sys != NULL, return); + + client_channel = pcmk__find_client_by_id(sys); + + if (crm_element_value(msg, F_CRM_HOST_FROM) == NULL) { + crm_xml_add(msg, F_CRM_HOST_FROM, controld_globals.our_nodename); + } + + if (client_channel != NULL) { + /* Transient clients such as crmadmin */ + pcmk__ipc_send_xml(client_channel, 0, msg, crm_ipc_server_event); + + } else if (pcmk__str_eq(sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) { + xmlNode *data = get_message_xml(msg, F_CRM_DATA); + + process_te_message(msg, data); + + } else if (pcmk__str_eq(sys, CRM_SYSTEM_LRMD, pcmk__str_none)) { + fsa_data_t fsa_data; + ha_msg_input_t fsa_input; + + fsa_input.msg = msg; + fsa_input.xml = get_message_xml(msg, F_CRM_DATA); + + fsa_data.id = 0; + fsa_data.actions = 0; + fsa_data.data = &fsa_input; + fsa_data.fsa_input = I_MESSAGE; + fsa_data.fsa_cause = C_IPC_MESSAGE; + fsa_data.origin = __func__; + fsa_data.data_type = fsa_dt_ha_msg; + + do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, controld_globals.fsa_state, + I_MESSAGE, &fsa_data); + + } else if (crmd_is_proxy_session(sys)) { + crmd_proxy_send(sys, msg); + + } else { + crm_info("Received invalid request: unknown subsystem '%s'", sys); + } +} + +void +delete_ha_msg_input(ha_msg_input_t * orig) +{ + if (orig == NULL) { + return; + } + free_xml(orig->msg); + free(orig); +} + +/*! + * \internal + * \brief Notify the cluster of a remote node state change + * + * \param[in] node_name Node's name + * \param[in] node_up true if node is up, false if down + */ +void +broadcast_remote_state_message(const char *node_name, bool node_up) +{ + xmlNode *msg = create_request(CRM_OP_REMOTE_STATE, NULL, NULL, + CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); + + crm_info("Notifying cluster of Pacemaker Remote node %s %s", + node_name, node_up? "coming up" : "going down"); + + crm_xml_add(msg, XML_ATTR_ID, node_name); + pcmk__xe_set_bool_attr(msg, XML_NODE_IN_CLUSTER, node_up); + + if (node_up) { + crm_xml_add(msg, PCMK__XA_CONN_HOST, controld_globals.our_nodename); + } + + send_cluster_message(NULL, crm_msg_crmd, msg, TRUE); + free_xml(msg); +} + diff --git a/daemons/controld/controld_messages.h b/daemons/controld/controld_messages.h new file mode 100644 index 0000000..4108961 --- /dev/null +++ b/daemons/controld/controld_messages.h @@ -0,0 +1,86 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef XML_CRM_MESSAGES__H +# define XML_CRM_MESSAGES__H + +# include <crm/crm.h> +# include <crm/common/ipc_internal.h> +# include <crm/common/xml.h> +# include <crm/cluster/internal.h> +# include <controld_fsa.h> + +typedef struct ha_msg_input_s { + xmlNode *msg; + xmlNode *xml; + +} ha_msg_input_t; + +extern void delete_ha_msg_input(ha_msg_input_t * orig); + +extern void *fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, + const char *caller); + +# define fsa_typed_data(x) fsa_typed_data_adv(msg_data, x, __func__) + +extern void register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, + fsa_data_t * cur_data, void *new_data, const char *raised_from); + +#define register_fsa_error(cause, input, new_data) \ + register_fsa_error_adv(cause, input, msg_data, new_data, __func__) + +void register_fsa_input_adv(enum crmd_fsa_cause cause, + enum crmd_fsa_input input, void *data, + uint64_t with_actions, gboolean prepend, + const char *raised_from); + +extern void fsa_dump_queue(int log_level); +extern void route_message(enum crmd_fsa_cause cause, xmlNode * input); + +# define crmd_fsa_stall(suppress) do { \ + if(suppress == FALSE && msg_data != NULL) { \ + register_fsa_input_adv( \ + ((fsa_data_t*)msg_data)->fsa_cause, I_WAIT_FOR_EVENT, \ + ((fsa_data_t*)msg_data)->data, action, TRUE, __func__); \ + } else { \ + register_fsa_input_adv( \ + C_FSA_INTERNAL, I_WAIT_FOR_EVENT, \ + NULL, action, TRUE, __func__); \ + } \ + } while(0) + +#define register_fsa_input(cause, input, data) \ + register_fsa_input_adv(cause, input, data, A_NOTHING, FALSE, __func__) + +#define register_fsa_input_before(cause, input, data) \ + register_fsa_input_adv(cause, input, data, A_NOTHING, TRUE, __func__) + +#define register_fsa_input_later(cause, input, data) \ + register_fsa_input_adv(cause, input, data, A_NOTHING, FALSE, __func__) + +void delete_fsa_input(fsa_data_t * fsa_data); + +fsa_data_t *get_message(void); + +extern gboolean relay_message(xmlNode * relay_message, gboolean originated_locally); + +gboolean crmd_is_proxy_session(const char *session); +void crmd_proxy_send(const char *session, xmlNode *msg); + +bool controld_authorize_ipc_message(const xmlNode *client_msg, + pcmk__client_t *curr_client, + const char *proxy_session); + +extern gboolean send_request(xmlNode * msg, char **msg_reference); + +extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig); + +void broadcast_remote_state_message(const char *node_name, bool node_up); + +#endif diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c new file mode 100644 index 0000000..240a978 --- /dev/null +++ b/daemons/controld/controld_metadata.c @@ -0,0 +1,320 @@ +/* + * Copyright 2017-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <stdio.h> +#include <glib.h> +#include <regex.h> + +#include <crm/crm.h> +#include <crm/lrmd.h> + +#include <pacemaker-controld.h> + +static void +ra_param_free(void *param) +{ + if (param) { + struct ra_param_s *p = (struct ra_param_s *) param; + + if (p->rap_name) { + free(p->rap_name); + } + free(param); + } +} + +static void +metadata_free(void *metadata) +{ + if (metadata) { + struct ra_metadata_s *md = (struct ra_metadata_s *) metadata; + + g_list_free_full(md->ra_params, ra_param_free); + free(metadata); + } +} + +GHashTable * +metadata_cache_new(void) +{ + return pcmk__strkey_table(free, metadata_free); +} + +void +metadata_cache_free(GHashTable *mdc) +{ + if (mdc) { + crm_trace("Destroying metadata cache with %d members", g_hash_table_size(mdc)); + g_hash_table_destroy(mdc); + } +} + +void +metadata_cache_reset(GHashTable *mdc) +{ + if (mdc) { + crm_trace("Resetting metadata cache with %d members", + g_hash_table_size(mdc)); + g_hash_table_remove_all(mdc); + } +} + +static struct ra_param_s * +ra_param_from_xml(xmlNode *param_xml) +{ + const char *param_name = crm_element_value(param_xml, "name"); + struct ra_param_s *p; + + p = calloc(1, sizeof(struct ra_param_s)); + if (p == NULL) { + return NULL; + } + + p->rap_name = strdup(param_name); + if (p->rap_name == NULL) { + free(p); + return NULL; + } + + if (pcmk__xe_attr_is_true(param_xml, "reloadable")) { + controld_set_ra_param_flags(p, ra_param_reloadable); + } + + if (pcmk__xe_attr_is_true(param_xml, "unique")) { + controld_set_ra_param_flags(p, ra_param_unique); + } + + if (pcmk__xe_attr_is_true(param_xml, "private")) { + controld_set_ra_param_flags(p, ra_param_private); + } + return p; +} + +static void +log_ra_ocf_version(const char *ra_key, const char *ra_ocf_version) +{ + if (pcmk__str_empty(ra_ocf_version)) { + crm_warn("%s does not advertise OCF version supported", ra_key); + + } else if (compare_version(ra_ocf_version, "2") >= 0) { + crm_warn("%s supports OCF version %s (this Pacemaker version supports " + PCMK_OCF_VERSION " and might not work properly with agent)", + ra_key, ra_ocf_version); + + } else if (compare_version(ra_ocf_version, PCMK_OCF_VERSION) > 0) { + crm_info("%s supports OCF version %s (this Pacemaker version supports " + PCMK_OCF_VERSION " and might not use all agent features)", + ra_key, ra_ocf_version); + + } else { + crm_debug("%s supports OCF version %s", ra_key, ra_ocf_version); + } +} + +struct ra_metadata_s * +controld_cache_metadata(GHashTable *mdc, const lrmd_rsc_info_t *rsc, + const char *metadata_str) +{ + char *key = NULL; + const char *reason = NULL; + xmlNode *metadata = NULL; + xmlNode *match = NULL; + struct ra_metadata_s *md = NULL; + bool any_private_params = false; + bool ocf1_1 = false; + + CRM_CHECK(mdc && rsc && metadata_str, return NULL); + + key = crm_generate_ra_key(rsc->standard, rsc->provider, rsc->type); + if (!key) { + reason = "Invalid resource agent standard or type"; + goto err; + } + + metadata = string2xml(metadata_str); + if (!metadata) { + reason = "Metadata is not valid XML"; + goto err; + } + + md = calloc(1, sizeof(struct ra_metadata_s)); + if (md == NULL) { + reason = "Could not allocate memory"; + goto err; + } + + if (strcmp(rsc->standard, PCMK_RESOURCE_CLASS_OCF) == 0) { + xmlChar *content = NULL; + xmlNode *version_element = first_named_child(metadata, "version"); + + if (version_element != NULL) { + content = xmlNodeGetContent(version_element); + } + log_ra_ocf_version(key, (const char *) content); + if (content != NULL) { + ocf1_1 = (compare_version((const char *) content, "1.1") >= 0); + xmlFree(content); + } + } + + // Check supported actions + match = first_named_child(metadata, "actions"); + for (match = first_named_child(match, "action"); match != NULL; + match = crm_next_same_xml(match)) { + + const char *action_name = crm_element_value(match, "name"); + + if (pcmk__str_eq(action_name, CRMD_ACTION_RELOAD_AGENT, + pcmk__str_none)) { + if (ocf1_1) { + controld_set_ra_flags(md, key, ra_supports_reload_agent); + } else { + crm_notice("reload-agent action will not be used with %s " + "because it does not support OCF 1.1 or later", key); + } + + } else if (!ocf1_1 && pcmk__str_eq(action_name, CRMD_ACTION_RELOAD, + pcmk__str_casei)) { + controld_set_ra_flags(md, key, ra_supports_legacy_reload); + } + } + + // Build a parameter list + match = first_named_child(metadata, "parameters"); + for (match = first_named_child(match, "parameter"); match != NULL; + match = crm_next_same_xml(match)) { + + const char *param_name = crm_element_value(match, "name"); + + if (param_name == NULL) { + crm_warn("Metadata for %s:%s:%s has parameter without a name", + rsc->standard, rsc->provider, rsc->type); + } else { + struct ra_param_s *p = ra_param_from_xml(match); + + if (p == NULL) { + reason = "Could not allocate memory"; + goto err; + } + if (pcmk_is_set(p->rap_flags, ra_param_private)) { + any_private_params = true; + } + md->ra_params = g_list_prepend(md->ra_params, p); + } + } + + /* Newer resource agents support the "private" parameter attribute to + * indicate sensitive parameters. For backward compatibility with older + * agents, implicitly treat a few common names as private when the agent + * doesn't specify any explicitly. + */ + if (!any_private_params) { + for (GList *iter = md->ra_params; iter != NULL; iter = iter->next) { + struct ra_param_s *p = iter->data; + + if (pcmk__str_any_of(p->rap_name, "password", "passwd", "user", + NULL)) { + controld_set_ra_param_flags(p, ra_param_private); + } + } + } + + g_hash_table_replace(mdc, key, md); + free_xml(metadata); + return md; + +err: + crm_warn("Unable to update metadata for %s (%s%s%s:%s): %s", + rsc->id, rsc->standard, ((rsc->provider == NULL)? "" : ":"), + pcmk__s(rsc->provider, ""), rsc->type, reason); + free(key); + free_xml(metadata); + metadata_free(md); + return NULL; +} + +/*! + * \internal + * \brief Get meta-data for a resource + * + * \param[in,out] lrm_state Use meta-data cache from this executor connection + * \param[in] rsc Resource to get meta-data for + * \param[in] source Allowed meta-data sources (bitmask of + * enum controld_metadata_source_e values) + * + * \return Meta-data cache entry for given resource, or NULL if not available + */ +struct ra_metadata_s * +controld_get_rsc_metadata(lrm_state_t *lrm_state, const lrmd_rsc_info_t *rsc, + uint32_t source) +{ + struct ra_metadata_s *metadata = NULL; + char *metadata_str = NULL; + char *key = NULL; + int rc = pcmk_ok; + + CRM_CHECK((lrm_state != NULL) && (rsc != NULL), return NULL); + + if (pcmk_is_set(source, controld_metadata_from_cache)) { + key = crm_generate_ra_key(rsc->standard, rsc->provider, rsc->type); + if (key != NULL) { + metadata = g_hash_table_lookup(lrm_state->metadata_cache, key); + free(key); + } + if (metadata != NULL) { + crm_debug("Retrieved metadata for %s (%s%s%s:%s) from cache", + rsc->id, rsc->standard, + ((rsc->provider == NULL)? "" : ":"), + ((rsc->provider == NULL)? "" : rsc->provider), + rsc->type); + return metadata; + } + } + + if (!pcmk_is_set(source, controld_metadata_from_agent)) { + return NULL; + } + + /* For most actions, metadata was cached asynchronously before action + * execution (via metadata_complete()). + * + * However if that failed, and for other actions, retrieve the metadata now + * via a local, synchronous, direct execution of the agent. + * + * This has multiple issues, which is why this is just a fallback: the + * executor should execute agents, not the controller; metadata for + * Pacemaker Remote nodes should be collected on those nodes, not locally; + * the metadata call shouldn't eat into the timeout of the real action being + * performed; and the synchronous call blocks the controller (which also + * means that if the metadata action tries to contact the controller, + * everything will hang until the timeout). + */ + crm_debug("Retrieving metadata for %s (%s%s%s:%s) synchronously", + rsc->id, rsc->standard, + ((rsc->provider == NULL)? "" : ":"), + ((rsc->provider == NULL)? "" : rsc->provider), + rsc->type); + rc = lrm_state_get_metadata(lrm_state, rsc->standard, rsc->provider, + rsc->type, &metadata_str, 0); + if (rc != pcmk_ok) { + crm_warn("Failed to get metadata for %s (%s%s%s:%s): %s", + rsc->id, rsc->standard, + ((rsc->provider == NULL)? "" : ":"), + ((rsc->provider == NULL)? "" : rsc->provider), + rsc->type, pcmk_strerror(rc)); + return NULL; + } + + metadata = controld_cache_metadata(lrm_state->metadata_cache, rsc, + metadata_str); + free(metadata_str); + return metadata; +} diff --git a/daemons/controld/controld_metadata.h b/daemons/controld/controld_metadata.h new file mode 100644 index 0000000..12ea327 --- /dev/null +++ b/daemons/controld/controld_metadata.h @@ -0,0 +1,96 @@ +/* + * Copyright 2017-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#ifndef CRMD_METADATA_H +#define CRMD_METADATA_H + +#include <stdint.h> // uint32_t +#include <glib.h> // GList, GHashTable +#include "controld_lrm.h" // lrm_state_t, lrm_rsc_info_t + +/* + * @COMPAT pre-OCF-1.1 resource agents + * + * Pacemaker previously used the "reload" action to reload agent parameters, + * but most agents used it to reload the service configuration. Pacemaker also + * misused the OCF 1.0 "unique" parameter attribute to indicate reloadability. + * + * OCF 1.1 created the "reload-agent" action and "reloadable" parameter + * attribute for the Pacemaker usage. + * + * Pacemaker now supports the OCF 1.1 usage. The old usage is now deprecated, + * but will be supported if the agent does not claim OCF 1.1 or later + * compliance and does not advertise the reload-agent action. + */ +enum ra_flags_e { + ra_supports_legacy_reload = (1 << 0), + ra_supports_reload_agent = (1 << 1), +}; + +enum ra_param_flags_e { + ra_param_unique = (1 << 0), + ra_param_private = (1 << 1), + ra_param_reloadable = (1 << 2), +}; + +// Allowed sources of resource agent meta-data when requesting it +enum controld_metadata_source_e { + controld_metadata_from_cache = (1 << 0), + controld_metadata_from_agent = (1 << 1), +}; + +struct ra_param_s { + char *rap_name; + uint32_t rap_flags; // bitmask of ra_param_flags_s +}; + +struct ra_metadata_s { + GList *ra_params; // ra_param_s + uint32_t ra_flags; // bitmask of ra_flags_e +}; + +#define controld_set_ra_flags(ra_md, ra_key, flags_to_set) do { \ + (ra_md)->ra_flags = pcmk__set_flags_as(__func__, __LINE__, \ + LOG_TRACE, "Resource agent", ra_key, \ + (ra_md)->ra_flags, (flags_to_set), #flags_to_set); \ + } while (0) + +#define controld_set_ra_param_flags(ra_param, flags_to_set) do { \ + (ra_param)->rap_flags = pcmk__set_flags_as(__func__, __LINE__, \ + LOG_TRACE, "Resource agent parameter", (ra_param)->rap_name, \ + (ra_param)->rap_flags, (flags_to_set), #flags_to_set); \ + } while (0) + +GHashTable *metadata_cache_new(void); +void metadata_cache_free(GHashTable *mdc); +void metadata_cache_reset(GHashTable *mdc); + +struct ra_metadata_s *controld_cache_metadata(GHashTable *mdc, + const lrmd_rsc_info_t *rsc, + const char *metadata_str); +struct ra_metadata_s *controld_get_rsc_metadata(lrm_state_t *lrm_state, + const lrmd_rsc_info_t *rsc, + uint32_t source); + +static inline const char * +ra_param_flag2text(enum ra_param_flags_e flag) +{ + switch (flag) { + case ra_param_reloadable: + return "reloadable"; + case ra_param_unique: + return "unique"; + case ra_param_private: + return "private"; + default: + return "unknown"; + } +} + +#endif diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c new file mode 100644 index 0000000..f24b755 --- /dev/null +++ b/daemons/controld/controld_remote_ra.c @@ -0,0 +1,1440 @@ +/* + * Copyright 2013-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <crm/crm.h> +#include <crm/msg_xml.h> +#include <crm/common/xml_internal.h> +#include <crm/lrmd.h> +#include <crm/lrmd_internal.h> +#include <crm/services.h> + +#include <pacemaker-controld.h> + +#define REMOTE_LRMD_RA "remote" + +/* The max start timeout before cmd retry */ +#define MAX_START_TIMEOUT_MS 10000 + +#define cmd_set_flags(cmd, flags_to_set) do { \ + (cmd)->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ + "Remote command", (cmd)->rsc_id, (cmd)->status, \ + (flags_to_set), #flags_to_set); \ + } while (0) + +#define cmd_clear_flags(cmd, flags_to_clear) do { \ + (cmd)->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ + "Remote command", (cmd)->rsc_id, (cmd)->status, \ + (flags_to_clear), #flags_to_clear); \ + } while (0) + +enum remote_cmd_status { + cmd_reported_success = (1 << 0), + cmd_cancel = (1 << 1), +}; + +typedef struct remote_ra_cmd_s { + /*! the local node the cmd is issued from */ + char *owner; + /*! the remote node the cmd is executed on */ + char *rsc_id; + /*! the action to execute */ + char *action; + /*! some string the client wants us to give it back */ + char *userdata; + /*! start delay in ms */ + int start_delay; + /*! timer id used for start delay. */ + int delay_id; + /*! timeout in ms for cmd */ + int timeout; + int remaining_timeout; + /*! recurring interval in ms */ + guint interval_ms; + /*! interval timer id */ + int interval_id; + int monitor_timeout_id; + int takeover_timeout_id; + /*! action parameters */ + lrmd_key_value_t *params; + pcmk__action_result_t result; + int call_id; + time_t start_time; + uint32_t status; +} remote_ra_cmd_t; + +#define lrm_remote_set_flags(lrm_state, flags_to_set) do { \ + lrm_state_t *lrm = (lrm_state); \ + remote_ra_data_t *ra = lrm->remote_ra_data; \ + ra->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \ + lrm->node_name, ra->status, \ + (flags_to_set), #flags_to_set); \ + } while (0) + +#define lrm_remote_clear_flags(lrm_state, flags_to_clear) do { \ + lrm_state_t *lrm = (lrm_state); \ + remote_ra_data_t *ra = lrm->remote_ra_data; \ + ra->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \ + lrm->node_name, ra->status, \ + (flags_to_clear), #flags_to_clear); \ + } while (0) + +enum remote_status { + expect_takeover = (1 << 0), + takeover_complete = (1 << 1), + remote_active = (1 << 2), + /* Maintenance mode is difficult to determine from the controller's context, + * so we have it signalled back with the transition from the scheduler. + */ + remote_in_maint = (1 << 3), + /* Similar for whether we are controlling a guest node or remote node. + * Fortunately there is a meta-attribute in the transition already and + * as the situation doesn't change over time we can use the + * resource start for noting down the information for later use when + * the attributes aren't at hand. + */ + controlling_guest = (1 << 4), +}; + +typedef struct remote_ra_data_s { + crm_trigger_t *work; + remote_ra_cmd_t *cur_cmd; + GList *cmds; + GList *recurring_cmds; + uint32_t status; +} remote_ra_data_t; + +static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms); +static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd); +static GList *fail_all_monitor_cmds(GList * list); + +static void +free_cmd(gpointer user_data) +{ + remote_ra_cmd_t *cmd = user_data; + + if (!cmd) { + return; + } + if (cmd->delay_id) { + g_source_remove(cmd->delay_id); + } + if (cmd->interval_id) { + g_source_remove(cmd->interval_id); + } + if (cmd->monitor_timeout_id) { + g_source_remove(cmd->monitor_timeout_id); + } + if (cmd->takeover_timeout_id) { + g_source_remove(cmd->takeover_timeout_id); + } + free(cmd->owner); + free(cmd->rsc_id); + free(cmd->action); + free(cmd->userdata); + pcmk__reset_result(&(cmd->result)); + lrmd_key_value_freeall(cmd->params); + free(cmd); +} + +static int +generate_callid(void) +{ + static int remote_ra_callid = 0; + + remote_ra_callid++; + if (remote_ra_callid <= 0) { + remote_ra_callid = 1; + } + + return remote_ra_callid; +} + +static gboolean +recurring_helper(gpointer data) +{ + remote_ra_cmd_t *cmd = data; + lrm_state_t *connection_rsc = NULL; + + cmd->interval_id = 0; + connection_rsc = lrm_state_find(cmd->rsc_id); + if (connection_rsc && connection_rsc->remote_ra_data) { + remote_ra_data_t *ra_data = connection_rsc->remote_ra_data; + + ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd); + + ra_data->cmds = g_list_append(ra_data->cmds, cmd); + mainloop_set_trigger(ra_data->work); + } + return FALSE; +} + +static gboolean +start_delay_helper(gpointer data) +{ + remote_ra_cmd_t *cmd = data; + lrm_state_t *connection_rsc = NULL; + + cmd->delay_id = 0; + connection_rsc = lrm_state_find(cmd->rsc_id); + if (connection_rsc && connection_rsc->remote_ra_data) { + remote_ra_data_t *ra_data = connection_rsc->remote_ra_data; + + mainloop_set_trigger(ra_data->work); + } + return FALSE; +} + +static bool +should_purge_attributes(crm_node_t *node) +{ + bool purge = true; + crm_node_t *conn_node = NULL; + lrm_state_t *connection_rsc = NULL; + + if (!node->conn_host) { + return purge; + } + + /* Get the node that was hosting the remote connection resource from the + * peer cache. That's the one we really care about here. + */ + conn_node = crm_get_peer(0, node->conn_host); + if (conn_node == NULL) { + return purge; + } + + /* Check the uptime of connection_rsc. If it hasn't been running long + * enough, set purge=true. "Long enough" means it started running earlier + * than the timestamp when we noticed it went away in the first place. + */ + connection_rsc = lrm_state_find(node->uname); + + if (connection_rsc != NULL) { + lrmd_t *lrm = connection_rsc->conn; + time_t uptime = lrmd__uptime(lrm); + time_t now = time(NULL); + + /* Add 20s of fuzziness to give corosync a while to notice the remote + * host is gone. On various error conditions (failure to get uptime, + * peer_lost isn't set) we default to purging. + */ + if (uptime > 0 && + conn_node->peer_lost > 0 && + uptime + 20 >= now - conn_node->peer_lost) { + purge = false; + } + } + + return purge; +} + +static enum controld_section_e +section_to_delete(bool purge) +{ + if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { + if (purge) { + return controld_section_all_unlocked; + } else { + return controld_section_lrm_unlocked; + } + } else { + if (purge) { + return controld_section_all; + } else { + return controld_section_lrm; + } + } +} + +static void +purge_remote_node_attrs(int call_opt, crm_node_t *node) +{ + bool purge = should_purge_attributes(node); + enum controld_section_e section = section_to_delete(purge); + + /* Purge node from attrd's memory */ + if (purge) { + update_attrd_remote_node_removed(node->uname, NULL); + } + + controld_delete_node_state(node->uname, section, call_opt); +} + +/*! + * \internal + * \brief Handle cluster communication related to pacemaker_remote node joining + * + * \param[in] node_name Name of newly integrated pacemaker_remote node + */ +static void +remote_node_up(const char *node_name) +{ + int call_opt; + xmlNode *update, *state; + crm_node_t *node; + + CRM_CHECK(node_name != NULL, return); + crm_info("Announcing Pacemaker Remote node %s", node_name); + + call_opt = crmd_cib_smart_opt(); + + /* Delete node's probe_complete attribute. This serves two purposes: + * + * - @COMPAT DCs < 1.1.14 in a rolling upgrade might use it + * - deleting it (or any attribute for that matter) here ensures the + * attribute manager learns the node is remote + */ + update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE); + + /* Ensure node is in the remote peer cache with member status */ + node = crm_remote_peer_get(node_name); + CRM_CHECK(node != NULL, return); + + purge_remote_node_attrs(call_opt, node); + pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0); + + /* pacemaker_remote nodes don't participate in the membership layer, + * so cluster nodes don't automatically get notified when they come and go. + * We send a cluster message to the DC, and update the CIB node state entry, + * so the DC will get it sooner (via message) or later (via CIB refresh), + * and any other interested parties can query the CIB. + */ + broadcast_remote_state_message(node_name, true); + + update = create_xml_node(NULL, XML_CIB_TAG_STATUS); + state = create_node_state_update(node, node_update_cluster, update, + __func__); + + /* Clear the XML_NODE_IS_FENCED flag in the node state. If the node ever + * needs to be fenced, this flag will allow various actions to determine + * whether the fencing has happened yet. + */ + crm_xml_add(state, XML_NODE_IS_FENCED, "0"); + + /* TODO: If the remote connection drops, and this (async) CIB update either + * failed or has not yet completed, later actions could mistakenly think the + * node has already been fenced (if the XML_NODE_IS_FENCED attribute was + * previously set, because it won't have been cleared). This could prevent + * actual fencing or allow recurring monitor failures to be cleared too + * soon. Ideally, we wouldn't rely on the CIB for the fenced status. + */ + controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, NULL); + free_xml(update); +} + +enum down_opts { + DOWN_KEEP_LRM, + DOWN_ERASE_LRM +}; + +/*! + * \internal + * \brief Handle cluster communication related to pacemaker_remote node leaving + * + * \param[in] node_name Name of lost node + * \param[in] opts Whether to keep or erase LRM history + */ +static void +remote_node_down(const char *node_name, const enum down_opts opts) +{ + xmlNode *update; + int call_opt = crmd_cib_smart_opt(); + crm_node_t *node; + + /* Purge node from attrd's memory */ + update_attrd_remote_node_removed(node_name, NULL); + + /* Normally, only node attributes should be erased, and the resource history + * should be kept until the node comes back up. However, after a successful + * fence, we want to clear the history as well, so we don't think resources + * are still running on the node. + */ + if (opts == DOWN_ERASE_LRM) { + controld_delete_node_state(node_name, controld_section_all, call_opt); + } else { + controld_delete_node_state(node_name, controld_section_attrs, call_opt); + } + + /* Ensure node is in the remote peer cache with lost state */ + node = crm_remote_peer_get(node_name); + CRM_CHECK(node != NULL, return); + pcmk__update_peer_state(__func__, node, CRM_NODE_LOST, 0); + + /* Notify DC */ + broadcast_remote_state_message(node_name, false); + + /* Update CIB node state */ + update = create_xml_node(NULL, XML_CIB_TAG_STATUS); + create_node_state_update(node, node_update_cluster, update, __func__); + controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, NULL); + free_xml(update); +} + +/*! + * \internal + * \brief Handle effects of a remote RA command on node state + * + * \param[in] cmd Completed remote RA command + */ +static void +check_remote_node_state(const remote_ra_cmd_t *cmd) +{ + /* Only successful actions can change node state */ + if (!pcmk__result_ok(&(cmd->result))) { + return; + } + + if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) { + remote_node_up(cmd->rsc_id); + + } else if (pcmk__str_eq(cmd->action, "migrate_from", pcmk__str_casei)) { + /* After a successful migration, we don't need to do remote_node_up() + * because the DC already knows the node is up, and we don't want to + * clear LRM history etc. We do need to add the remote node to this + * host's remote peer cache, because (unless it happens to be DC) + * it hasn't been tracking the remote node, and other code relies on + * the cache to distinguish remote nodes from unseen cluster nodes. + */ + crm_node_t *node = crm_remote_peer_get(cmd->rsc_id); + + CRM_CHECK(node != NULL, return); + pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0); + + } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { + lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id); + remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL; + + if (ra_data) { + if (!pcmk_is_set(ra_data->status, takeover_complete)) { + /* Stop means down if we didn't successfully migrate elsewhere */ + remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM); + } else if (AM_I_DC == FALSE) { + /* Only the connection host and DC track node state, + * so if the connection migrated elsewhere and we aren't DC, + * un-cache the node, so we don't have stale info + */ + crm_remote_peer_cache_remove(cmd->rsc_id); + } + } + } + + /* We don't do anything for successful monitors, which is correct for + * routine recurring monitors, and for monitors on nodes where the + * connection isn't supposed to be (the cluster will stop the connection in + * that case). However, if the initial probe finds the connection already + * active on the node where we want it, we probably should do + * remote_node_up(). Unfortunately, we can't distinguish that case here. + * Given that connections have to be initiated by the cluster, the chance of + * that should be close to zero. + */ +} + +static void +report_remote_ra_result(remote_ra_cmd_t * cmd) +{ + lrmd_event_data_t op = { 0, }; + + check_remote_node_state(cmd); + + op.type = lrmd_event_exec_complete; + op.rsc_id = cmd->rsc_id; + op.op_type = cmd->action; + op.user_data = cmd->userdata; + op.timeout = cmd->timeout; + op.interval_ms = cmd->interval_ms; + op.t_run = (unsigned int) cmd->start_time; + op.t_rcchange = (unsigned int) cmd->start_time; + + lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status, + cmd->result.exit_reason); + + if (pcmk_is_set(cmd->status, cmd_reported_success) && !pcmk__result_ok(&(cmd->result))) { + op.t_rcchange = (unsigned int) time(NULL); + /* This edge case will likely never ever occur, but if it does the + * result is that a failure will not be processed correctly. This is only + * remotely possible because we are able to detect a connection resource's tcp + * connection has failed at any moment after start has completed. The actual + * recurring operation is just a connectivity ping. + * + * basically, we are not guaranteed that the first successful monitor op and + * a subsequent failed monitor op will not occur in the same timestamp. We have to + * make it look like the operations occurred at separate times though. */ + if (op.t_rcchange == op.t_run) { + op.t_rcchange++; + } + } + + if (cmd->params) { + lrmd_key_value_t *tmp; + + op.params = pcmk__strkey_table(free, free); + for (tmp = cmd->params; tmp; tmp = tmp->next) { + g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value)); + } + + } + op.call_id = cmd->call_id; + op.remote_nodename = cmd->owner; + + lrm_op_callback(&op); + + if (op.params) { + g_hash_table_destroy(op.params); + } + lrmd__reset_result(&op); +} + +static void +update_remaining_timeout(remote_ra_cmd_t * cmd) +{ + cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000; +} + +static gboolean +retry_start_cmd_cb(gpointer data) +{ + lrm_state_t *lrm_state = data; + remote_ra_data_t *ra_data = lrm_state->remote_ra_data; + remote_ra_cmd_t *cmd = NULL; + int rc = ETIME; + + if (!ra_data || !ra_data->cur_cmd) { + return FALSE; + } + cmd = ra_data->cur_cmd; + if (!pcmk__strcase_any_of(cmd->action, "start", "migrate_from", NULL)) { + return FALSE; + } + update_remaining_timeout(cmd); + + if (cmd->remaining_timeout > 0) { + rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout); + } else { + pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, + PCMK_EXEC_TIMEOUT, + "Not enough time remains to retry remote connection"); + } + + if (rc != pcmk_rc_ok) { + report_remote_ra_result(cmd); + + if (ra_data->cmds) { + mainloop_set_trigger(ra_data->work); + } + ra_data->cur_cmd = NULL; + free_cmd(cmd); + } else { + /* wait for connection event */ + } + + return FALSE; +} + + +static gboolean +connection_takeover_timeout_cb(gpointer data) +{ + lrm_state_t *lrm_state = NULL; + remote_ra_cmd_t *cmd = data; + + crm_info("takeover event timed out for node %s", cmd->rsc_id); + cmd->takeover_timeout_id = 0; + + lrm_state = lrm_state_find(cmd->rsc_id); + + handle_remote_ra_stop(lrm_state, cmd); + free_cmd(cmd); + + return FALSE; +} + +static gboolean +monitor_timeout_cb(gpointer data) +{ + lrm_state_t *lrm_state = NULL; + remote_ra_cmd_t *cmd = data; + + lrm_state = lrm_state_find(cmd->rsc_id); + + crm_info("Timed out waiting for remote poke response from %s%s", + cmd->rsc_id, (lrm_state? "" : " (no LRM state)")); + cmd->monitor_timeout_id = 0; + pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT, + "Remote executor did not respond"); + + if (lrm_state && lrm_state->remote_ra_data) { + remote_ra_data_t *ra_data = lrm_state->remote_ra_data; + + if (ra_data->cur_cmd == cmd) { + ra_data->cur_cmd = NULL; + } + if (ra_data->cmds) { + mainloop_set_trigger(ra_data->work); + } + } + + report_remote_ra_result(cmd); + free_cmd(cmd); + + if(lrm_state) { + lrm_state_disconnect(lrm_state); + } + return FALSE; +} + +static void +synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type) +{ + lrmd_event_data_t op = { 0, }; + + if (lrm_state == NULL) { + /* if lrm_state not given assume local */ + lrm_state = lrm_state_find(controld_globals.our_nodename); + } + CRM_ASSERT(lrm_state != NULL); + + op.type = lrmd_event_exec_complete; + op.rsc_id = rsc_id; + op.op_type = op_type; + op.t_run = (unsigned int) time(NULL); + op.t_rcchange = op.t_run; + op.call_id = generate_callid(); + lrmd__set_result(&op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); + process_lrm_event(lrm_state, &op, NULL, NULL); +} + +void +remote_lrm_op_callback(lrmd_event_data_t * op) +{ + gboolean cmd_handled = FALSE; + lrm_state_t *lrm_state = NULL; + remote_ra_data_t *ra_data = NULL; + remote_ra_cmd_t *cmd = NULL; + + crm_debug("Processing '%s%s%s' event on remote connection to %s: %s " + "(%d) status=%s (%d)", + (op->op_type? op->op_type : ""), (op->op_type? " " : ""), + lrmd_event_type2str(op->type), op->remote_nodename, + services_ocf_exitcode_str(op->rc), op->rc, + pcmk_exec_status_str(op->op_status), op->op_status); + + lrm_state = lrm_state_find(op->remote_nodename); + if (!lrm_state || !lrm_state->remote_ra_data) { + crm_debug("No state information found for remote connection event"); + return; + } + ra_data = lrm_state->remote_ra_data; + + if (op->type == lrmd_event_new_client) { + // Another client has connected to the remote daemon + + if (pcmk_is_set(ra_data->status, expect_takeover)) { + // Great, we knew this was coming + lrm_remote_clear_flags(lrm_state, expect_takeover); + lrm_remote_set_flags(lrm_state, takeover_complete); + + } else { + crm_err("Disconnecting from Pacemaker Remote node %s due to " + "unexpected client takeover", op->remote_nodename); + /* In this case, lrmd_tls_connection_destroy() will be called under the control of mainloop. */ + /* Do not free lrm_state->conn yet. */ + /* It'll be freed in the following stop action. */ + lrm_state_disconnect_only(lrm_state); + } + return; + } + + /* filter all EXEC events up */ + if (op->type == lrmd_event_exec_complete) { + if (pcmk_is_set(ra_data->status, takeover_complete)) { + crm_debug("ignoring event, this connection is taken over by another node"); + } else { + lrm_op_callback(op); + } + return; + } + + if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) { + + if (!pcmk_is_set(ra_data->status, remote_active)) { + crm_debug("Disconnection from Pacemaker Remote node %s complete", + lrm_state->node_name); + + } else if (!remote_ra_is_in_maintenance(lrm_state)) { + crm_err("Lost connection to Pacemaker Remote node %s", + lrm_state->node_name); + ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds); + ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds); + + } else { + crm_notice("Unmanaged Pacemaker Remote node %s disconnected", + lrm_state->node_name); + /* Do roughly what a 'stop' on the remote-resource would do */ + handle_remote_ra_stop(lrm_state, NULL); + remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM); + /* now fake the reply of a successful 'stop' */ + synthesize_lrmd_success(NULL, lrm_state->node_name, "stop"); + } + return; + } + + if (!ra_data->cur_cmd) { + crm_debug("no event to match"); + return; + } + + cmd = ra_data->cur_cmd; + + /* Start actions and migrate from actions complete after connection + * comes back to us. */ + if (op->type == lrmd_event_connect && pcmk__strcase_any_of(cmd->action, "start", + "migrate_from", NULL)) { + if (op->connection_rc < 0) { + update_remaining_timeout(cmd); + + if ((op->connection_rc == -ENOKEY) + || (op->connection_rc == -EKEYREJECTED)) { + // Hard error, don't retry + pcmk__set_result(&(cmd->result), PCMK_OCF_INVALID_PARAM, + PCMK_EXEC_ERROR, + pcmk_strerror(op->connection_rc)); + + } else if (cmd->remaining_timeout > 3000) { + crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout); + g_timeout_add(1000, retry_start_cmd_cb, lrm_state); + return; + + } else { + crm_trace("can't reschedule start, remaining timeout too small %d", + cmd->remaining_timeout); + pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, + PCMK_EXEC_TIMEOUT, + "%s without enough time to retry", + pcmk_strerror(op->connection_rc)); + } + + } else { + lrm_state_reset_tables(lrm_state, TRUE); + pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); + lrm_remote_set_flags(lrm_state, remote_active); + } + + crm_debug("Remote connection event matched %s action", cmd->action); + report_remote_ra_result(cmd); + cmd_handled = TRUE; + + } else if (op->type == lrmd_event_poke && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + + if (cmd->monitor_timeout_id) { + g_source_remove(cmd->monitor_timeout_id); + cmd->monitor_timeout_id = 0; + } + + /* Only report success the first time, after that only worry about failures. + * For this function, if we get the poke pack, it is always a success. Pokes + * only fail if the send fails, or the response times out. */ + if (!pcmk_is_set(cmd->status, cmd_reported_success)) { + pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); + report_remote_ra_result(cmd); + cmd_set_flags(cmd, cmd_reported_success); + } + + crm_debug("Remote poke event matched %s action", cmd->action); + + /* success, keep rescheduling if interval is present. */ + if (cmd->interval_ms && !pcmk_is_set(cmd->status, cmd_cancel)) { + ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd); + cmd->interval_id = g_timeout_add(cmd->interval_ms, + recurring_helper, cmd); + cmd = NULL; /* prevent free */ + } + cmd_handled = TRUE; + + } else if (op->type == lrmd_event_disconnect && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + if (pcmk_is_set(ra_data->status, remote_active) && + !pcmk_is_set(cmd->status, cmd_cancel)) { + pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, + PCMK_EXEC_ERROR, + "Remote connection unexpectedly dropped " + "during monitor"); + report_remote_ra_result(cmd); + crm_err("Remote connection to %s unexpectedly dropped during monitor", + lrm_state->node_name); + } + cmd_handled = TRUE; + + } else if (op->type == lrmd_event_new_client && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { + + handle_remote_ra_stop(lrm_state, cmd); + cmd_handled = TRUE; + + } else { + crm_debug("Event did not match %s action", ra_data->cur_cmd->action); + } + + if (cmd_handled) { + ra_data->cur_cmd = NULL; + if (ra_data->cmds) { + mainloop_set_trigger(ra_data->work); + } + free_cmd(cmd); + } +} + +static void +handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd) +{ + remote_ra_data_t *ra_data = NULL; + + CRM_ASSERT(lrm_state); + ra_data = lrm_state->remote_ra_data; + + if (!pcmk_is_set(ra_data->status, takeover_complete)) { + /* delete pending ops when ever the remote connection is intentionally stopped */ + g_hash_table_remove_all(lrm_state->active_ops); + } else { + /* we no longer hold the history if this connection has been migrated, + * however, we keep metadata cache for future use */ + lrm_state_reset_tables(lrm_state, FALSE); + } + + lrm_remote_clear_flags(lrm_state, remote_active); + lrm_state_disconnect(lrm_state); + + if (ra_data->cmds) { + g_list_free_full(ra_data->cmds, free_cmd); + } + if (ra_data->recurring_cmds) { + g_list_free_full(ra_data->recurring_cmds, free_cmd); + } + ra_data->cmds = NULL; + ra_data->recurring_cmds = NULL; + ra_data->cur_cmd = NULL; + + if (cmd) { + pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); + report_remote_ra_result(cmd); + } +} + +// \return Standard Pacemaker return code +static int +handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms) +{ + const char *server = NULL; + lrmd_key_value_t *tmp = NULL; + int port = 0; + int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms; + int rc = pcmk_rc_ok; + + for (tmp = cmd->params; tmp; tmp = tmp->next) { + if (pcmk__strcase_any_of(tmp->key, XML_RSC_ATTR_REMOTE_RA_ADDR, + XML_RSC_ATTR_REMOTE_RA_SERVER, NULL)) { + server = tmp->value; + } else if (pcmk__str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT, pcmk__str_casei)) { + port = atoi(tmp->value); + } else if (pcmk__str_eq(tmp->key, CRM_META "_" XML_RSC_ATTR_CONTAINER, pcmk__str_casei)) { + lrm_remote_set_flags(lrm_state, controlling_guest); + } + } + + rc = controld_connect_remote_executor(lrm_state, server, port, + timeout_used); + if (rc != pcmk_rc_ok) { + pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, + PCMK_EXEC_ERROR, + "Could not connect to Pacemaker Remote node %s: %s", + lrm_state->node_name, pcmk_rc_str(rc)); + } + return rc; +} + +static gboolean +handle_remote_ra_exec(gpointer user_data) +{ + int rc = 0; + lrm_state_t *lrm_state = user_data; + remote_ra_data_t *ra_data = lrm_state->remote_ra_data; + remote_ra_cmd_t *cmd; + GList *first = NULL; + + if (ra_data->cur_cmd) { + /* still waiting on previous cmd */ + return TRUE; + } + + while (ra_data->cmds) { + first = ra_data->cmds; + cmd = first->data; + if (cmd->delay_id) { + /* still waiting for start delay timer to trip */ + return TRUE; + } + + ra_data->cmds = g_list_remove_link(ra_data->cmds, first); + g_list_free_1(first); + + if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) { + lrm_remote_clear_flags(lrm_state, expect_takeover | takeover_complete); + if (handle_remote_ra_start(lrm_state, cmd, + cmd->timeout) == pcmk_rc_ok) { + /* take care of this later when we get async connection result */ + crm_debug("Initiated async remote connection, %s action will complete after connect event", + cmd->action); + ra_data->cur_cmd = cmd; + return TRUE; + } + report_remote_ra_result(cmd); + + } else if (!strcmp(cmd->action, "monitor")) { + + if (lrm_state_is_connected(lrm_state) == TRUE) { + rc = lrm_state_poke_connection(lrm_state); + if (rc < 0) { + pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, + PCMK_EXEC_ERROR, pcmk_strerror(rc)); + } + } else { + rc = -1; + pcmk__set_result(&(cmd->result), PCMK_OCF_NOT_RUNNING, + PCMK_EXEC_DONE, "Remote connection inactive"); + } + + if (rc == 0) { + crm_debug("Poked Pacemaker Remote at node %s, waiting for async response", + cmd->rsc_id); + ra_data->cur_cmd = cmd; + cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd); + return TRUE; + } + report_remote_ra_result(cmd); + + } else if (!strcmp(cmd->action, "stop")) { + + if (pcmk_is_set(ra_data->status, expect_takeover)) { + /* briefly wait on stop for the takeover event to occur. If the + * takeover event does not occur during the wait period, that's fine. + * It just means that the remote-node's lrm_status section is going to get + * cleared which will require all the resources running in the remote-node + * to be explicitly re-detected via probe actions. If the takeover does occur + * successfully, then we can leave the status section intact. */ + cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd); + ra_data->cur_cmd = cmd; + return TRUE; + } + + handle_remote_ra_stop(lrm_state, cmd); + + } else if (!strcmp(cmd->action, "migrate_to")) { + lrm_remote_clear_flags(lrm_state, takeover_complete); + lrm_remote_set_flags(lrm_state, expect_takeover); + pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); + report_remote_ra_result(cmd); + } else if (pcmk__str_any_of(cmd->action, CRMD_ACTION_RELOAD, + CRMD_ACTION_RELOAD_AGENT, NULL)) { + /* Currently the only reloadable parameter is reconnect_interval, + * which is only used by the scheduler via the CIB, so reloads are a + * no-op. + * + * @COMPAT DC <2.1.0: We only need to check for "reload" in case + * we're in a rolling upgrade with a DC scheduling "reload" instead + * of "reload-agent". An OCF 1.1 "reload" would be a no-op anyway, + * so this would work for that purpose as well. + */ + pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); + report_remote_ra_result(cmd); + } + + free_cmd(cmd); + } + + return TRUE; +} + +static void +remote_ra_data_init(lrm_state_t * lrm_state) +{ + remote_ra_data_t *ra_data = NULL; + + if (lrm_state->remote_ra_data) { + return; + } + + ra_data = calloc(1, sizeof(remote_ra_data_t)); + ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state); + lrm_state->remote_ra_data = ra_data; +} + +void +remote_ra_cleanup(lrm_state_t * lrm_state) +{ + remote_ra_data_t *ra_data = lrm_state->remote_ra_data; + + if (!ra_data) { + return; + } + + if (ra_data->cmds) { + g_list_free_full(ra_data->cmds, free_cmd); + } + + if (ra_data->recurring_cmds) { + g_list_free_full(ra_data->recurring_cmds, free_cmd); + } + mainloop_destroy_trigger(ra_data->work); + free(ra_data); + lrm_state->remote_ra_data = NULL; +} + +gboolean +is_remote_lrmd_ra(const char *agent, const char *provider, const char *id) +{ + if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) { + return TRUE; + } + if ((id != NULL) && (lrm_state_find(id) != NULL) + && !pcmk__str_eq(id, controld_globals.our_nodename, pcmk__str_casei)) { + return TRUE; + } + + return FALSE; +} + +lrmd_rsc_info_t * +remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id) +{ + lrmd_rsc_info_t *info = NULL; + + if ((lrm_state_find(rsc_id))) { + info = calloc(1, sizeof(lrmd_rsc_info_t)); + + info->id = strdup(rsc_id); + info->type = strdup(REMOTE_LRMD_RA); + info->standard = strdup(PCMK_RESOURCE_CLASS_OCF); + info->provider = strdup("pacemaker"); + } + + return info; +} + +static gboolean +is_remote_ra_supported_action(const char *action) +{ + return pcmk__str_any_of(action, + CRMD_ACTION_START, + CRMD_ACTION_STOP, + CRMD_ACTION_STATUS, + CRMD_ACTION_MIGRATE, + CRMD_ACTION_MIGRATED, + CRMD_ACTION_RELOAD_AGENT, + CRMD_ACTION_RELOAD, + NULL); +} + +static GList * +fail_all_monitor_cmds(GList * list) +{ + GList *rm_list = NULL; + remote_ra_cmd_t *cmd = NULL; + GList *gIter = NULL; + + for (gIter = list; gIter != NULL; gIter = gIter->next) { + cmd = gIter->data; + if ((cmd->interval_ms > 0) && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + rm_list = g_list_append(rm_list, cmd); + } + } + + for (gIter = rm_list; gIter != NULL; gIter = gIter->next) { + cmd = gIter->data; + + pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, + PCMK_EXEC_ERROR, "Lost connection to remote executor"); + crm_trace("Pre-emptively failing %s %s (interval=%u, %s)", + cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata); + report_remote_ra_result(cmd); + + list = g_list_remove(list, cmd); + free_cmd(cmd); + } + + /* frees only the list data, not the cmds */ + g_list_free(rm_list); + return list; +} + +static GList * +remove_cmd(GList * list, const char *action, guint interval_ms) +{ + remote_ra_cmd_t *cmd = NULL; + GList *gIter = NULL; + + for (gIter = list; gIter != NULL; gIter = gIter->next) { + cmd = gIter->data; + if ((cmd->interval_ms == interval_ms) + && pcmk__str_eq(cmd->action, action, pcmk__str_casei)) { + break; + } + cmd = NULL; + } + if (cmd) { + list = g_list_remove(list, cmd); + free_cmd(cmd); + } + return list; +} + +int +remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id, + const char *action, guint interval_ms) +{ + lrm_state_t *connection_rsc = NULL; + remote_ra_data_t *ra_data = NULL; + + connection_rsc = lrm_state_find(rsc_id); + if (!connection_rsc || !connection_rsc->remote_ra_data) { + return -EINVAL; + } + + ra_data = connection_rsc->remote_ra_data; + ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms); + ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action, + interval_ms); + if (ra_data->cur_cmd && + (ra_data->cur_cmd->interval_ms == interval_ms) && + (pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) { + + cmd_set_flags(ra_data->cur_cmd, cmd_cancel); + } + + return 0; +} + +static remote_ra_cmd_t * +handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms, + const char *userdata) +{ + GList *gIter = NULL; + remote_ra_cmd_t *cmd = NULL; + + /* there are 3 places a potential duplicate monitor operation + * could exist. + * 1. recurring_cmds list. where the op is waiting for its next interval + * 2. cmds list, where the op is queued to get executed immediately + * 3. cur_cmd, which means the monitor op is in flight right now. + */ + if (interval_ms == 0) { + return NULL; + } + + if (ra_data->cur_cmd && + !pcmk_is_set(ra_data->cur_cmd->status, cmd_cancel) && + (ra_data->cur_cmd->interval_ms == interval_ms) && + pcmk__str_eq(ra_data->cur_cmd->action, "monitor", pcmk__str_casei)) { + + cmd = ra_data->cur_cmd; + goto handle_dup; + } + + for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) { + cmd = gIter->data; + if ((cmd->interval_ms == interval_ms) + && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + goto handle_dup; + } + } + + for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) { + cmd = gIter->data; + if ((cmd->interval_ms == interval_ms) + && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + goto handle_dup; + } + } + + return NULL; + +handle_dup: + + crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT, + cmd->rsc_id, "monitor", interval_ms); + + /* update the userdata */ + if (userdata) { + free(cmd->userdata); + cmd->userdata = strdup(userdata); + } + + /* if we've already reported success, generate a new call id */ + if (pcmk_is_set(cmd->status, cmd_reported_success)) { + cmd->start_time = time(NULL); + cmd->call_id = generate_callid(); + cmd_clear_flags(cmd, cmd_reported_success); + } + + /* if we have an interval_id set, that means we are in the process of + * waiting for this cmd's next interval. instead of waiting, cancel + * the timer and execute the action immediately */ + if (cmd->interval_id) { + g_source_remove(cmd->interval_id); + cmd->interval_id = 0; + recurring_helper(cmd); + } + + return cmd; +} + +/*! + * \internal + * \brief Execute an action using the (internal) ocf:pacemaker:remote agent + * + * \param[in] lrm_state Executor state object for remote connection + * \param[in] rsc_id Connection resource ID + * \param[in] action Action to execute + * \param[in] userdata String to copy and pass to execution callback + * \param[in] interval_ms Action interval (in milliseconds) + * \param[in] timeout_ms Action timeout (in milliseconds) + * \param[in] start_delay_ms Delay (in milliseconds) before executing action + * \param[in,out] params Connection resource parameters + * \param[out] call_id Where to store call ID on success + * + * \return Standard Pacemaker return code + * \note This takes ownership of \p params, which should not be used or freed + * after calling this function. + */ +int +controld_execute_remote_agent(const lrm_state_t *lrm_state, const char *rsc_id, + const char *action, const char *userdata, + guint interval_ms, int timeout_ms, + int start_delay_ms, lrmd_key_value_t *params, + int *call_id) +{ + lrm_state_t *connection_rsc = NULL; + remote_ra_cmd_t *cmd = NULL; + remote_ra_data_t *ra_data = NULL; + + *call_id = 0; + + CRM_CHECK((lrm_state != NULL) && (rsc_id != NULL) && (action != NULL) + && (userdata != NULL) && (call_id != NULL), + lrmd_key_value_freeall(params); return EINVAL); + + if (!is_remote_ra_supported_action(action)) { + lrmd_key_value_freeall(params); + return EOPNOTSUPP; + } + + connection_rsc = lrm_state_find(rsc_id); + if (connection_rsc == NULL) { + lrmd_key_value_freeall(params); + return ENOTCONN; + } + + remote_ra_data_init(connection_rsc); + ra_data = connection_rsc->remote_ra_data; + + cmd = handle_dup_monitor(ra_data, interval_ms, userdata); + if (cmd) { + *call_id = cmd->call_id; + lrmd_key_value_freeall(params); + return pcmk_rc_ok; + } + + cmd = calloc(1, sizeof(remote_ra_cmd_t)); + if (cmd == NULL) { + lrmd_key_value_freeall(params); + return ENOMEM; + } + + cmd->owner = strdup(lrm_state->node_name); + cmd->rsc_id = strdup(rsc_id); + cmd->action = strdup(action); + cmd->userdata = strdup(userdata); + if ((cmd->owner == NULL) || (cmd->rsc_id == NULL) || (cmd->action == NULL) + || (cmd->userdata == NULL)) { + free_cmd(cmd); + lrmd_key_value_freeall(params); + return ENOMEM; + } + + cmd->interval_ms = interval_ms; + cmd->timeout = timeout_ms; + cmd->start_delay = start_delay_ms; + cmd->params = params; + cmd->start_time = time(NULL); + + cmd->call_id = generate_callid(); + + if (cmd->start_delay) { + cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); + } + + ra_data->cmds = g_list_append(ra_data->cmds, cmd); + mainloop_set_trigger(ra_data->work); + + *call_id = cmd->call_id; + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Immediately fail all monitors of a remote node, if proxied here + * + * \param[in] node_name Name of pacemaker_remote node + */ +void +remote_ra_fail(const char *node_name) +{ + lrm_state_t *lrm_state = lrm_state_find(node_name); + + if (lrm_state && lrm_state_is_connected(lrm_state)) { + remote_ra_data_t *ra_data = lrm_state->remote_ra_data; + + crm_info("Failing monitors on Pacemaker Remote node %s", node_name); + ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds); + ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds); + } +} + +/* A guest node fencing implied by host fencing looks like: + * + * <pseudo_event id="103" operation="stonith" operation_key="stonith-lxc1-off" + * on_node="lxc1" on_node_uuid="lxc1"> + * <attributes CRM_meta_on_node="lxc1" CRM_meta_on_node_uuid="lxc1" + * CRM_meta_stonith_action="off" crm_feature_set="3.0.12"/> + * <downed> + * <node id="lxc1"/> + * </downed> + * </pseudo_event> + */ +#define XPATH_PSEUDO_FENCE "/" XML_GRAPH_TAG_PSEUDO_EVENT \ + "[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \ + "/" XML_CIB_TAG_NODE + +/*! + * \internal + * \brief Check a pseudo-action for Pacemaker Remote node side effects + * + * \param[in,out] xml XML of pseudo-action to check + */ +void +remote_ra_process_pseudo(xmlNode *xml) +{ + xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE); + + if (numXpathResults(search) == 1) { + xmlNode *result = getXpathResult(search, 0); + + /* Normally, we handle the necessary side effects of a guest node stop + * action when reporting the remote agent's result. However, if the stop + * is implied due to fencing, it will be a fencing pseudo-event, and + * there won't be a result to report. Handle that case here. + * + * This will result in a duplicate call to remote_node_down() if the + * guest stop was real instead of implied, but that shouldn't hurt. + * + * There is still one corner case that isn't handled: if a guest node + * isn't running any resources when its host is fenced, it will appear + * to be cleanly stopped, so there will be no pseudo-fence, and our + * peer cache state will be incorrect unless and until the guest is + * recovered. + */ + if (result) { + const char *remote = ID(result); + + if (remote) { + remote_node_down(remote, DOWN_ERASE_LRM); + } + } + } + freeXpathObject(search); +} + +static void +remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance) +{ + xmlNode *update, *state; + int call_opt; + crm_node_t *node; + + call_opt = crmd_cib_smart_opt(); + node = crm_remote_peer_get(lrm_state->node_name); + CRM_CHECK(node != NULL, return); + update = create_xml_node(NULL, XML_CIB_TAG_STATUS); + state = create_node_state_update(node, node_update_none, update, + __func__); + crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0"); + if (controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, + NULL) == pcmk_rc_ok) { + /* TODO: still not 100% sure that async update will succeed ... */ + if (maintenance) { + lrm_remote_set_flags(lrm_state, remote_in_maint); + } else { + lrm_remote_clear_flags(lrm_state, remote_in_maint); + } + } + free_xml(update); +} + +#define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \ + "[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \ + XML_GRAPH_TAG_MAINTENANCE + +/*! + * \internal + * \brief Check a pseudo-action holding updates for maintenance state + * + * \param[in,out] xml XML of pseudo-action to check + */ +void +remote_ra_process_maintenance_nodes(xmlNode *xml) +{ + xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE); + + if (numXpathResults(search) == 1) { + xmlNode *node; + int cnt = 0, cnt_remote = 0; + + for (node = + first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE); + node != NULL; node = pcmk__xml_next(node)) { + lrm_state_t *lrm_state = lrm_state_find(ID(node)); + + cnt++; + if (lrm_state && lrm_state->remote_ra_data && + pcmk_is_set(((remote_ra_data_t *) lrm_state->remote_ra_data)->status, remote_active)) { + int is_maint; + + cnt_remote++; + pcmk__scan_min_int(crm_element_value(node, XML_NODE_IS_MAINTENANCE), + &is_maint, 0); + remote_ra_maintenance(lrm_state, is_maint); + } + } + crm_trace("Action holds %d nodes (%d remotes found) " + "adjusting maintenance-mode", cnt, cnt_remote); + } + freeXpathObject(search); +} + +gboolean +remote_ra_is_in_maintenance(lrm_state_t * lrm_state) +{ + remote_ra_data_t *ra_data = lrm_state->remote_ra_data; + return pcmk_is_set(ra_data->status, remote_in_maint); +} + +gboolean +remote_ra_controlling_guest(lrm_state_t * lrm_state) +{ + remote_ra_data_t *ra_data = lrm_state->remote_ra_data; + return pcmk_is_set(ra_data->status, controlling_guest); +} diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c new file mode 100644 index 0000000..912f9a5 --- /dev/null +++ b/daemons/controld/controld_schedulerd.c @@ -0,0 +1,506 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <unistd.h> /* pid_t, sleep, ssize_t */ + +#include <crm/cib.h> +#include <crm/cluster.h> +#include <crm/common/xml.h> +#include <crm/crm.h> +#include <crm/msg_xml.h> +#include <crm/common/xml_internal.h> +#include <crm/common/ipc.h> +#include <crm/common/ipc_schedulerd.h> + +#include <pacemaker-controld.h> + +static void handle_disconnect(void); + +static pcmk_ipc_api_t *schedulerd_api = NULL; + +/*! + * \internal + * \brief Close any scheduler connection and free associated memory + */ +void +controld_shutdown_schedulerd_ipc(void) +{ + controld_clear_fsa_input_flags(R_PE_REQUIRED); + pcmk_disconnect_ipc(schedulerd_api); + handle_disconnect(); + + pcmk_free_ipc_api(schedulerd_api); + schedulerd_api = NULL; +} + +/*! + * \internal + * \brief Save CIB query result to file, raising FSA error + * + * \param[in] msg Ignored + * \param[in] call_id Call ID of CIB query + * \param[in] rc Return code of CIB query + * \param[in,out] output Result of CIB query + * \param[in] user_data Unique identifier for filename + * + * \note This is intended to be called after a scheduler connection fails. + */ +static void +save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output, + void *user_data) +{ + const char *id = user_data; + + register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); + CRM_CHECK(id != NULL, return); + + if (rc == pcmk_ok) { + char *filename = crm_strdup_printf(PE_STATE_DIR "/pe-core-%s.bz2", id); + + if (write_xml_file(output, filename, TRUE) < 0) { + crm_err("Could not save Cluster Information Base to %s after scheduler crash", + filename); + } else { + crm_notice("Saved Cluster Information Base to %s after scheduler crash", + filename); + } + free(filename); + } +} + +/*! + * \internal + * \brief Respond to scheduler connection failure + */ +static void +handle_disconnect(void) +{ + // If we aren't connected to the scheduler, we can't expect a reply + controld_expect_sched_reply(NULL); + + if (pcmk_is_set(controld_globals.fsa_input_register, R_PE_REQUIRED)) { + int rc = pcmk_ok; + char *uuid_str = crm_generate_uuid(); + + crm_crit("Connection to the scheduler failed " + CRM_XS " uuid=%s", uuid_str); + + /* + * The scheduler died... + * + * Save the current CIB so that we have a chance of + * figuring out what killed it. + * + * Delay raising the I_ERROR until the query below completes or + * 5s is up, whichever comes first. + * + */ + rc = controld_globals.cib_conn->cmds->query(controld_globals.cib_conn, + NULL, NULL, + cib_scope_local); + fsa_register_cib_callback(rc, uuid_str, save_cib_contents); + + } else { + crm_info("Connection to the scheduler released"); + } + + controld_clear_fsa_input_flags(R_PE_CONNECTED); + controld_trigger_fsa(); + return; +} + +static void +handle_reply(pcmk_schedulerd_api_reply_t *reply) +{ + const char *msg_ref = NULL; + + if (!AM_I_DC) { + return; + } + + msg_ref = reply->data.graph.reference; + + if (msg_ref == NULL) { + crm_err("%s - Ignoring calculation with no reference", CRM_OP_PECALC); + + } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref, + pcmk__str_none)) { + ha_msg_input_t fsa_input; + xmlNode *crm_data_node; + + controld_stop_sched_timer(); + + /* do_te_invoke (which will eventually process the fsa_input we are constructing + * here) requires that fsa_input.xml be non-NULL. That will only happen if + * copy_ha_msg_input (which is called by register_fsa_input_adv) sees the + * fsa_input.msg that it is expecting. The scheduler's IPC dispatch function + * gave us the values we need, we just need to put them into XML. + * + * The name of the top level element here is irrelevant. Nothing checks it. + */ + fsa_input.msg = create_xml_node(NULL, "dummy-reply"); + crm_xml_add(fsa_input.msg, XML_ATTR_REFERENCE, msg_ref); + crm_xml_add(fsa_input.msg, F_CRM_TGRAPH_INPUT, reply->data.graph.input); + + crm_data_node = create_xml_node(fsa_input.msg, F_CRM_DATA); + add_node_copy(crm_data_node, reply->data.graph.tgraph); + register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input); + + free_xml(fsa_input.msg); + + } else { + crm_info("%s calculation %s is obsolete", CRM_OP_PECALC, msg_ref); + } +} + +static void +scheduler_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, + crm_exit_t status, void *event_data, void *user_data) +{ + pcmk_schedulerd_api_reply_t *reply = event_data; + + switch (event_type) { + case pcmk_ipc_event_disconnect: + handle_disconnect(); + break; + + case pcmk_ipc_event_reply: + handle_reply(reply); + break; + + default: + break; + } +} + +static bool +new_schedulerd_ipc_connection(void) +{ + int rc; + + controld_set_fsa_input_flags(R_PE_REQUIRED); + + if (schedulerd_api == NULL) { + rc = pcmk_new_ipc_api(&schedulerd_api, pcmk_ipc_schedulerd); + + if (rc != pcmk_rc_ok) { + crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc)); + return false; + } + } + + pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL); + + rc = pcmk_connect_ipc(schedulerd_api, pcmk_ipc_dispatch_main); + if (rc != pcmk_rc_ok) { + crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc)); + return false; + } + + controld_set_fsa_input_flags(R_PE_CONNECTED); + return true; +} + +static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc, + xmlNode *output, void *user_data); + +/* A_PE_START, A_PE_STOP, O_PE_RESTART */ +void +do_pe_control(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + if (pcmk_is_set(action, A_PE_STOP)) { + controld_clear_fsa_input_flags(R_PE_REQUIRED); + pcmk_disconnect_ipc(schedulerd_api); + handle_disconnect(); + } + if (pcmk_is_set(action, A_PE_START) + && !pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) { + + if (cur_state == S_STOPPING) { + crm_info("Ignoring request to connect to scheduler while shutting down"); + + } else if (!new_schedulerd_ipc_connection()) { + crm_warn("Could not connect to scheduler"); + register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); + } + } +} + +static int fsa_pe_query = 0; +static mainloop_timer_t *controld_sched_timer = NULL; + +// @TODO Make this a configurable cluster option if there's demand for it +#define SCHED_TIMEOUT_MS (120000) + +/*! + * \internal + * \brief Handle a timeout waiting for scheduler reply + * + * \param[in] user_data Ignored + * + * \return FALSE (indicating that timer should not be restarted) + */ +static gboolean +controld_sched_timeout(gpointer user_data) +{ + if (AM_I_DC) { + /* If this node is the DC but can't communicate with the scheduler, just + * exit (and likely get fenced) so this node doesn't interfere with any + * further DC elections. + * + * @TODO We could try something less drastic first, like disconnecting + * and reconnecting to the scheduler, but something is likely going + * seriously wrong, so perhaps it's better to just fail as quickly as + * possible. + */ + crmd_exit(CRM_EX_FATAL); + } + return FALSE; +} + +void +controld_stop_sched_timer(void) +{ + if ((controld_sched_timer != NULL) + && (controld_globals.fsa_pe_ref != NULL)) { + crm_trace("Stopping timer for scheduler reply %s", + controld_globals.fsa_pe_ref); + } + mainloop_timer_stop(controld_sched_timer); +} + +/*! + * \internal + * \brief Set the scheduler request currently being waited on + * + * \param[in] ref Request to expect reply to (or NULL for none) + * + * \note This function takes ownership of \p ref. + */ +void +controld_expect_sched_reply(char *ref) +{ + if (ref) { + if (controld_sched_timer == NULL) { + controld_sched_timer = mainloop_timer_add("scheduler_reply_timer", + SCHED_TIMEOUT_MS, FALSE, + controld_sched_timeout, + NULL); + } + mainloop_timer_start(controld_sched_timer); + } else { + controld_stop_sched_timer(); + } + free(controld_globals.fsa_pe_ref); + controld_globals.fsa_pe_ref = ref; +} + +/*! + * \internal + * \brief Free the scheduler reply timer + */ +void +controld_free_sched_timer(void) +{ + if (controld_sched_timer != NULL) { + mainloop_timer_del(controld_sched_timer); + controld_sched_timer = NULL; + } +} + +/* A_PE_INVOKE */ +void +do_pe_invoke(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + cib_t *cib_conn = controld_globals.cib_conn; + + if (AM_I_DC == FALSE) { + crm_err("Not invoking scheduler because not DC: %s", + fsa_action2string(action)); + return; + } + + if (!pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) { + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + crm_err("Cannot shut down gracefully without the scheduler"); + register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL); + + } else { + crm_info("Waiting for the scheduler to connect"); + crmd_fsa_stall(FALSE); + controld_set_fsa_action_flags(A_PE_START); + controld_trigger_fsa(); + } + return; + } + + if (cur_state != S_POLICY_ENGINE) { + crm_notice("Not invoking scheduler because in state %s", + fsa_state2string(cur_state)); + return; + } + if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { + crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!"); + + /* start the join from scratch */ + register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL); + return; + } + + fsa_pe_query = cib_conn->cmds->query(cib_conn, NULL, NULL, cib_scope_local); + + crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query, + fsa_state2string(controld_globals.fsa_state)); + + controld_expect_sched_reply(NULL); + fsa_register_cib_callback(fsa_pe_query, NULL, do_pe_invoke_callback); +} + +static void +force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value) +{ + int max = 0; + int lpc = 0; + const char *xpath_base = NULL; + char *xpath_string = NULL; + xmlXPathObjectPtr xpathObj = NULL; + + xpath_base = pcmk_cib_xpath_for(XML_CIB_TAG_CRMCONFIG); + if (xpath_base == NULL) { + crm_err(XML_CIB_TAG_CRMCONFIG " CIB element not known (bug?)"); + return; + } + + xpath_string = crm_strdup_printf("%s//%s//nvpair[@name='%s']", + xpath_base, XML_CIB_TAG_PROPSET, + attr_name); + xpathObj = xpath_search(xml, xpath_string); + max = numXpathResults(xpathObj); + free(xpath_string); + + for (lpc = 0; lpc < max; lpc++) { + xmlNode *match = getXpathResult(xpathObj, lpc); + crm_trace("Forcing %s/%s = %s", ID(match), attr_name, attr_value); + crm_xml_add(match, XML_NVPAIR_ATTR_VALUE, attr_value); + } + + if(max == 0) { + xmlNode *configuration = NULL; + xmlNode *crm_config = NULL; + xmlNode *cluster_property_set = NULL; + + crm_trace("Creating %s-%s for %s=%s", + CIB_OPTIONS_FIRST, attr_name, attr_name, attr_value); + + configuration = pcmk__xe_match(xml, XML_CIB_TAG_CONFIGURATION, NULL, + NULL); + if (configuration == NULL) { + configuration = create_xml_node(xml, XML_CIB_TAG_CONFIGURATION); + } + + crm_config = pcmk__xe_match(configuration, XML_CIB_TAG_CRMCONFIG, NULL, + NULL); + if (crm_config == NULL) { + crm_config = create_xml_node(configuration, XML_CIB_TAG_CRMCONFIG); + } + + cluster_property_set = pcmk__xe_match(crm_config, XML_CIB_TAG_PROPSET, + NULL, NULL); + if (cluster_property_set == NULL) { + cluster_property_set = create_xml_node(crm_config, XML_CIB_TAG_PROPSET); + crm_xml_add(cluster_property_set, XML_ATTR_ID, CIB_OPTIONS_FIRST); + } + + xml = create_xml_node(cluster_property_set, XML_CIB_TAG_NVPAIR); + + crm_xml_set_id(xml, "%s-%s", CIB_OPTIONS_FIRST, attr_name); + crm_xml_add(xml, XML_NVPAIR_ATTR_NAME, attr_name); + crm_xml_add(xml, XML_NVPAIR_ATTR_VALUE, attr_value); + } + freeXpathObject(xpathObj); +} + +static void +do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + char *ref = NULL; + pid_t watchdog = pcmk__locate_sbd(); + + if (rc != pcmk_ok) { + crm_err("Could not retrieve the Cluster Information Base: %s " + CRM_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id); + register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); + return; + + } else if (call_id != fsa_pe_query) { + crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query); + return; + + } else if (!AM_I_DC + || !pcmk_is_set(controld_globals.fsa_input_register, + R_PE_CONNECTED)) { + crm_debug("No need to invoke the scheduler anymore"); + return; + + } else if (controld_globals.fsa_state != S_POLICY_ENGINE) { + crm_debug("Discarding scheduler request in state: %s", + fsa_state2string(controld_globals.fsa_state)); + return; + + /* this callback counts as 1 */ + } else if (num_cib_op_callbacks() > 1) { + crm_debug("Re-asking for the CIB: %d other peer updates still pending", + (num_cib_op_callbacks() - 1)); + sleep(1); + controld_set_fsa_action_flags(A_PE_INVOKE); + controld_trigger_fsa(); + return; + } + + CRM_LOG_ASSERT(output != NULL); + + /* Refresh the remote node cache and the known node cache when the + * scheduler is invoked */ + pcmk__refresh_node_caches_from_cib(output); + + crm_xml_add(output, XML_ATTR_DC_UUID, controld_globals.our_uuid); + pcmk__xe_set_bool_attr(output, XML_ATTR_HAVE_QUORUM, + pcmk_is_set(controld_globals.flags, + controld_has_quorum)); + + force_local_option(output, XML_ATTR_HAVE_WATCHDOG, pcmk__btoa(watchdog)); + + if (pcmk_is_set(controld_globals.flags, controld_ever_had_quorum) + && !crm_have_quorum) { + crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1); + } + + rc = pcmk_rc2legacy(pcmk_schedulerd_api_graph(schedulerd_api, output, &ref)); + + if (rc < 0) { + crm_err("Could not contact the scheduler: %s " CRM_XS " rc=%d", + pcmk_strerror(rc), rc); + register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); + } else { + CRM_ASSERT(ref != NULL); + controld_expect_sched_reply(ref); + crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, " + "quorate=%s", fsa_pe_query, controld_globals.fsa_pe_ref, + crm_peer_seq, pcmk__btoa(pcmk_is_set(controld_globals.flags, + controld_has_quorum))); + } +} diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c new file mode 100644 index 0000000..d8cfcad --- /dev/null +++ b/daemons/controld/controld_te_actions.c @@ -0,0 +1,746 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <sys/param.h> +#include <crm/crm.h> +#include <crm/cib.h> +#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_free_event() +#include <crm/msg_xml.h> +#include <crm/common/xml.h> +#include <crm/cluster.h> + +#include <pacemaker-internal.h> +#include <pacemaker-controld.h> + +static GHashTable *te_targets = NULL; +void send_rsc_command(pcmk__graph_action_t *action); +static void te_update_job_count(pcmk__graph_action_t *action, int offset); + +static void +te_start_action_timer(const pcmk__graph_t *graph, pcmk__graph_action_t *action) +{ + action->timer = g_timeout_add(action->timeout + graph->network_delay, + action_timer_callback, (void *) action); + CRM_ASSERT(action->timer != 0); +} + +/*! + * \internal + * \brief Execute a graph pseudo-action + * + * \param[in,out] graph Transition graph being executed + * \param[in,out] pseudo Pseudo-action to execute + * + * \return Standard Pacemaker return code + */ +static int +execute_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *pseudo) +{ + const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK); + + /* send to peers as well? */ + if (pcmk__str_eq(task, CRM_OP_MAINTENANCE_NODES, pcmk__str_casei)) { + GHashTableIter iter; + crm_node_t *node = NULL; + + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + xmlNode *cmd = NULL; + + if (pcmk__str_eq(controld_globals.our_nodename, node->uname, + pcmk__str_casei)) { + continue; + } + + cmd = create_request(task, pseudo->xml, node->uname, + CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); + send_cluster_message(node, crm_msg_crmd, cmd, FALSE); + free_xml(cmd); + } + + remote_ra_process_maintenance_nodes(pseudo->xml); + } else { + /* Check action for Pacemaker Remote node side effects */ + remote_ra_process_pseudo(pseudo->xml); + } + + crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id, + crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY)); + te_action_confirmed(pseudo, graph); + return pcmk_rc_ok; +} + +static int +get_target_rc(pcmk__graph_action_t *action) +{ + int exit_status; + + pcmk__scan_min_int(crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC), + &exit_status, 0); + return exit_status; +} + +/*! + * \internal + * \brief Execute a cluster action from a transition graph + * + * \param[in,out] graph Transition graph being executed + * \param[in,out] action Cluster action to execute + * + * \return Standard Pacemaker return code + */ +static int +execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) +{ + char *counter = NULL; + xmlNode *cmd = NULL; + gboolean is_local = FALSE; + + const char *id = NULL; + const char *task = NULL; + const char *value = NULL; + const char *on_node = NULL; + const char *router_node = NULL; + + gboolean rc = TRUE; + gboolean no_wait = FALSE; + + id = ID(action->xml); + CRM_CHECK(!pcmk__str_empty(id), return EPROTO); + + task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); + CRM_CHECK(!pcmk__str_empty(task), return EPROTO); + + on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + CRM_CHECK(!pcmk__str_empty(on_node), return pcmk_rc_node_unknown); + + router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); + if (router_node == NULL) { + router_node = on_node; + if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_none)) { + const char *mode = crm_element_value(action->xml, PCMK__XA_MODE); + + if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_none)) { + router_node = controld_globals.our_nodename; + } + } + } + + if (pcmk__str_eq(router_node, controld_globals.our_nodename, + pcmk__str_casei)) { + is_local = TRUE; + } + + value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); + if (crm_is_true(value)) { + no_wait = TRUE; + } + + crm_info("Handling controller request '%s' (%s on %s)%s%s", + id, task, on_node, (is_local? " locally" : ""), + (no_wait? " without waiting" : "")); + + if (is_local && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_none)) { + /* defer until everything else completes */ + crm_info("Controller request '%s' is a local shutdown", id); + graph->completion_action = pcmk__graph_shutdown; + graph->abort_reason = "local shutdown"; + te_action_confirmed(action, graph); + return pcmk_rc_ok; + + } else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_none)) { + crm_node_t *peer = crm_get_peer(0, router_node); + + pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN); + } + + cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); + + counter = pcmk__transition_key(controld_globals.transition_graph->id, + action->id, get_target_rc(action), + controld_globals.te_uuid); + crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); + + rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE); + free(counter); + free_xml(cmd); + + if (rc == FALSE) { + crm_err("Action %d failed: send", action->id); + return ECOMM; + + } else if (no_wait) { + te_action_confirmed(action, graph); + + } else { + if (action->timeout <= 0) { + crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead", + action->id, task, on_node, action->timeout, graph->network_delay); + action->timeout = (int) graph->network_delay; + } + te_start_action_timer(graph, action); + } + + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Synthesize an executor event for a resource action timeout + * + * \param[in] action Resource action that timed out + * \param[in] target_rc Expected result of action that timed out + * + * Synthesize an executor event for a resource action timeout. (If the executor + * gets a timeout while waiting for a resource action to complete, that will be + * reported via the usual callback. This timeout means we didn't hear from the + * executor itself or the controller that relayed the action to the executor.) + * + * \return Newly created executor event for result of \p action + * \note The caller is responsible for freeing the return value using + * lrmd_free_event(). + */ +static lrmd_event_data_t * +synthesize_timeout_event(const pcmk__graph_action_t *action, int target_rc) +{ + lrmd_event_data_t *op = NULL; + const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + const char *reason = NULL; + char *dynamic_reason = NULL; + + if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) { + reason = "Local executor did not return result in time"; + } else { + const char *router_node = NULL; + + router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); + if (router_node == NULL) { + router_node = target; + } + dynamic_reason = crm_strdup_printf("Controller on %s did not return " + "result in time", router_node); + reason = dynamic_reason; + } + + op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT, + PCMK_OCF_UNKNOWN_ERROR, reason); + op->call_id = -1; + op->user_data = pcmk__transition_key(controld_globals.transition_graph->id, + action->id, target_rc, + controld_globals.te_uuid); + free(dynamic_reason); + return op; +} + +static void +controld_record_action_event(pcmk__graph_action_t *action, + lrmd_event_data_t *op) +{ + cib_t *cib_conn = controld_globals.cib_conn; + + xmlNode *state = NULL; + xmlNode *rsc = NULL; + xmlNode *action_rsc = NULL; + + int rc = pcmk_ok; + + const char *rsc_id = NULL; + const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); + const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); + + int target_rc = get_target_rc(action); + + action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE); + if (action_rsc == NULL) { + return; + } + + rsc_id = ID(action_rsc); + CRM_CHECK(rsc_id != NULL, + crm_log_xml_err(action->xml, "Bad:action"); return); + +/* + update the CIB + +<node_state id="hadev"> + <lrm> + <lrm_resources> + <lrm_resource id="rsc2" last_op="start" op_code="0" target="hadev"/> +*/ + + state = create_xml_node(NULL, XML_CIB_TAG_STATE); + + crm_xml_add(state, XML_ATTR_ID, target_uuid); + crm_xml_add(state, XML_ATTR_UNAME, target); + + rsc = create_xml_node(state, XML_CIB_TAG_LRM); + crm_xml_add(rsc, XML_ATTR_ID, target_uuid); + + rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES); + rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE); + crm_xml_add(rsc, XML_ATTR_ID, rsc_id); + + + crm_copy_xml_element(action_rsc, rsc, XML_ATTR_TYPE); + crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS); + crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER); + + pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target, + __func__); + + rc = cib_conn->cmds->modify(cib_conn, XML_CIB_TAG_STATUS, state, + cib_scope_local); + fsa_register_cib_callback(rc, NULL, cib_action_updated); + free_xml(state); + + crm_trace("Sent CIB update (call ID %d) for synthesized event of action %d (%s on %s)", + rc, action->id, task_uuid, target); + pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update); +} + +void +controld_record_action_timeout(pcmk__graph_action_t *action) +{ + lrmd_event_data_t *op = NULL; + + const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); + + int target_rc = get_target_rc(action); + + crm_warn("%s %d: %s on %s timed out", + crm_element_name(action->xml), action->id, task_uuid, target); + + op = synthesize_timeout_event(action, target_rc); + controld_record_action_event(action, op); + lrmd_free_event(op); +} + +/*! + * \internal + * \brief Execute a resource action from a transition graph + * + * \param[in,out] graph Transition graph being executed + * \param[in,out] action Resource action to execute + * + * \return Standard Pacemaker return code + */ +static int +execute_rsc_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) +{ + /* never overwrite stop actions in the CIB with + * anything other than completed results + * + * Writing pending stops makes it look like the + * resource is running again + */ + xmlNode *cmd = NULL; + xmlNode *rsc_op = NULL; + + gboolean rc = TRUE; + gboolean no_wait = FALSE; + gboolean is_local = FALSE; + + char *counter = NULL; + const char *task = NULL; + const char *value = NULL; + const char *on_node = NULL; + const char *router_node = NULL; + const char *task_uuid = NULL; + + CRM_ASSERT(action != NULL); + CRM_ASSERT(action->xml != NULL); + + pcmk__clear_graph_action_flags(action, pcmk__graph_action_executed); + on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + + CRM_CHECK(!pcmk__str_empty(on_node), + crm_err("Corrupted command(id=%s) %s: no node", + ID(action->xml), pcmk__s(task, "without task")); + return pcmk_rc_node_unknown); + + rsc_op = action->xml; + task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); + task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); + router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE); + + if (!router_node) { + router_node = on_node; + } + + counter = pcmk__transition_key(controld_globals.transition_graph->id, + action->id, get_target_rc(action), + controld_globals.te_uuid); + crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter); + + if (pcmk__str_eq(router_node, controld_globals.our_nodename, + pcmk__str_casei)) { + is_local = TRUE; + } + + value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); + if (crm_is_true(value)) { + no_wait = TRUE; + } + + crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d", + task, task_uuid, (is_local? " locally" : ""), on_node, + (no_wait? " without waiting" : ""), action->id); + + cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node, + CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL); + + if (is_local) { + /* shortcut local resource commands */ + ha_msg_input_t data = { + .msg = cmd, + .xml = rsc_op, + }; + + fsa_data_t msg = { + .id = 0, + .data = &data, + .data_type = fsa_dt_ha_msg, + .fsa_input = I_NULL, + .fsa_cause = C_FSA_INTERNAL, + .actions = A_LRM_INVOKE, + .origin = __func__, + }; + + do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, controld_globals.fsa_state, + I_NULL, &msg); + + } else { + rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE); + } + + free(counter); + free_xml(cmd); + + pcmk__set_graph_action_flags(action, pcmk__graph_action_executed); + + if (rc == FALSE) { + crm_err("Action %d failed: send", action->id); + return ECOMM; + + } else if (no_wait) { + /* Just mark confirmed. Don't bump the job count only to immediately + * decrement it. + */ + crm_info("Action %d confirmed - no wait", action->id); + pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); + pcmk__update_graph(controld_globals.transition_graph, action); + trigger_graph(); + + } else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { + crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.", + action->id, task, task_uuid, on_node, action->timeout); + } else { + if (action->timeout <= 0) { + crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead", + action->id, task, task_uuid, on_node, action->timeout, graph->network_delay); + action->timeout = (int) graph->network_delay; + } + te_update_job_count(action, 1); + te_start_action_timer(graph, action); + } + + return pcmk_rc_ok; +} + +struct te_peer_s +{ + char *name; + int jobs; + int migrate_jobs; +}; + +static void te_peer_free(gpointer p) +{ + struct te_peer_s *peer = p; + + free(peer->name); + free(peer); +} + +void te_reset_job_counts(void) +{ + GHashTableIter iter; + struct te_peer_s *peer = NULL; + + if(te_targets == NULL) { + te_targets = pcmk__strkey_table(NULL, te_peer_free); + } + + g_hash_table_iter_init(&iter, te_targets); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) { + peer->jobs = 0; + peer->migrate_jobs = 0; + } +} + +static void +te_update_job_count_on(const char *target, int offset, bool migrate) +{ + struct te_peer_s *r = NULL; + + if(target == NULL || te_targets == NULL) { + return; + } + + r = g_hash_table_lookup(te_targets, target); + if(r == NULL) { + r = calloc(1, sizeof(struct te_peer_s)); + r->name = strdup(target); + g_hash_table_insert(te_targets, r->name, r); + } + + r->jobs += offset; + if(migrate) { + r->migrate_jobs += offset; + } + crm_trace("jobs[%s] = %d", target, r->jobs); +} + +static void +te_update_job_count(pcmk__graph_action_t *action, int offset) +{ + const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); + const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + + if ((action->type != pcmk__rsc_graph_action) || (target == NULL)) { + /* No limit on these */ + return; + } + + /* if we have a router node, this means the action is performing + * on a remote node. For now, we count all actions occurring on a + * remote node against the job list on the cluster node hosting + * the connection resources */ + target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); + + if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, + CRMD_ACTION_MIGRATED, NULL)) { + + const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE); + const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET); + + te_update_job_count_on(t1, offset, TRUE); + te_update_job_count_on(t2, offset, TRUE); + return; + } else if (target == NULL) { + target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + } + + te_update_job_count_on(target, offset, FALSE); +} + +/*! + * \internal + * \brief Check whether a graph action is allowed to be executed on a node + * + * \param[in] graph Transition graph being executed + * \param[in] action Graph action being executed + * \param[in] target Name of node where action should be executed + * + * \return true if action is allowed, otherwise false + */ +static bool +allowed_on_node(const pcmk__graph_t *graph, const pcmk__graph_action_t *action, + const char *target) +{ + int limit = 0; + struct te_peer_s *r = NULL; + const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); + const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); + + if(target == NULL) { + /* No limit on these */ + return true; + + } else if(te_targets == NULL) { + return false; + } + + r = g_hash_table_lookup(te_targets, target); + limit = throttle_get_job_limit(target); + + if(r == NULL) { + r = calloc(1, sizeof(struct te_peer_s)); + r->name = strdup(target); + g_hash_table_insert(te_targets, r->name, r); + } + + if(limit <= r->jobs) { + crm_trace("Peer %s is over their job limit of %d (%d): deferring %s", + target, limit, r->jobs, id); + return false; + + } else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) { + if (pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) { + crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s", + target, graph->migration_limit, r->migrate_jobs, id); + return false; + } + } + + crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit); + + return true; +} + +/*! + * \internal + * \brief Check whether a graph action is allowed to be executed + * + * \param[in] graph Transition graph being executed + * \param[in] action Graph action being executed + * + * \return true if action is allowed, otherwise false + */ +static bool +graph_action_allowed(pcmk__graph_t *graph, pcmk__graph_action_t *action) +{ + const char *target = NULL; + const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); + + if (action->type != pcmk__rsc_graph_action) { + /* No limit on these */ + return true; + } + + /* if we have a router node, this means the action is performing + * on a remote node. For now, we count all actions occurring on a + * remote node against the job list on the cluster node hosting + * the connection resources */ + target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); + + if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, + CRMD_ACTION_MIGRATED, NULL)) { + target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE); + if (!allowed_on_node(graph, action, target)) { + return false; + } + + target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET); + + } else if (target == NULL) { + target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + } + + return allowed_on_node(graph, action, target); +} + +/*! + * \brief Confirm a graph action (and optionally update graph) + * + * \param[in,out] action Action to confirm + * \param[in,out] graph Update and trigger this graph (if non-NULL) + */ +void +te_action_confirmed(pcmk__graph_action_t *action, pcmk__graph_t *graph) +{ + if (!pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { + if ((action->type == pcmk__rsc_graph_action) + && (crm_element_value(action->xml, XML_LRM_ATTR_TARGET) != NULL)) { + te_update_job_count(action, -1); + } + pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); + } + if (graph) { + pcmk__update_graph(graph, action); + trigger_graph(); + } +} + +static pcmk__graph_functions_t te_graph_fns = { + execute_pseudo_action, + execute_rsc_action, + execute_cluster_action, + controld_execute_fence_action, + graph_action_allowed, +}; + +/* + * \internal + * \brief Register the transitioner's graph functions with \p libpacemaker + */ +void +controld_register_graph_functions(void) +{ + pcmk__set_graph_functions(&te_graph_fns); +} + +void +notify_crmd(pcmk__graph_t *graph) +{ + const char *type = "unknown"; + enum crmd_fsa_input event = I_NULL; + + crm_debug("Processing transition completion in state %s", + fsa_state2string(controld_globals.fsa_state)); + + CRM_CHECK(graph->complete, graph->complete = true); + + switch (graph->completion_action) { + case pcmk__graph_wait: + type = "stop"; + if (controld_globals.fsa_state == S_TRANSITION_ENGINE) { + event = I_TE_SUCCESS; + } + break; + case pcmk__graph_done: + type = "done"; + if (controld_globals.fsa_state == S_TRANSITION_ENGINE) { + event = I_TE_SUCCESS; + } + break; + + case pcmk__graph_restart: + type = "restart"; + if (controld_globals.fsa_state == S_TRANSITION_ENGINE) { + if (controld_get_period_transition_timer() > 0) { + controld_stop_transition_timer(); + controld_start_transition_timer(); + } else { + event = I_PE_CALC; + } + + } else if (controld_globals.fsa_state == S_POLICY_ENGINE) { + controld_set_fsa_action_flags(A_PE_INVOKE); + controld_trigger_fsa(); + } + break; + + case pcmk__graph_shutdown: + type = "shutdown"; + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + event = I_STOP; + + } else { + crm_err("We didn't ask to be shut down, yet the scheduler is telling us to"); + event = I_TERMINATE; + } + } + + crm_debug("Transition %d status: %s - %s", graph->id, type, + pcmk__s(graph->abort_reason, "unspecified reason")); + + graph->abort_reason = NULL; + graph->completion_action = pcmk__graph_done; + + if (event != I_NULL) { + register_fsa_input(C_FSA_INTERNAL, event, NULL); + } else { + controld_trigger_fsa(); + } +} diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c new file mode 100644 index 0000000..cf9de83 --- /dev/null +++ b/daemons/controld/controld_te_callbacks.c @@ -0,0 +1,689 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <sys/stat.h> + +#include <crm/crm.h> +#include <crm/common/xml.h> +#include <crm/common/xml_internal.h> +#include <crm/msg_xml.h> +#include <crm/cluster.h> /* For ONLINESTATUS etc */ + +#include <pacemaker-controld.h> + +void te_update_confirm(const char *event, xmlNode * msg); + +#define RSC_OP_PREFIX "//" XML_TAG_DIFF_ADDED "//" XML_TAG_CIB \ + "//" XML_LRM_TAG_RSC_OP "[@" XML_ATTR_ID "='" + +// An explicit shutdown-lock of 0 means the lock has been cleared +static bool +shutdown_lock_cleared(xmlNode *lrm_resource) +{ + time_t shutdown_lock = 0; + + return (crm_element_value_epoch(lrm_resource, XML_CONFIG_ATTR_SHUTDOWN_LOCK, + &shutdown_lock) == pcmk_ok) + && (shutdown_lock == 0); +} + +static void +te_update_diff_v1(const char *event, xmlNode *diff) +{ + int lpc, max; + xmlXPathObject *xpathObj = NULL; + GString *rsc_op_xpath = NULL; + + CRM_CHECK(diff != NULL, return); + + pcmk__output_set_log_level(controld_globals.logger_out, LOG_TRACE); + controld_globals.logger_out->message(controld_globals.logger_out, + "xml-patchset", diff); + + if (cib__config_changed_v1(NULL, NULL, &diff)) { + abort_transition(INFINITY, pcmk__graph_restart, "Non-status change", + diff); + goto bail; /* configuration changed */ + } + + /* Tickets Attributes - Added/Updated */ + xpathObj = + xpath_search(diff, + "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_TICKETS); + if (numXpathResults(xpathObj) > 0) { + xmlNode *aborted = getXpathResult(xpathObj, 0); + + abort_transition(INFINITY, pcmk__graph_restart, + "Ticket attribute: update", aborted); + goto bail; + + } + freeXpathObject(xpathObj); + + /* Tickets Attributes - Removed */ + xpathObj = + xpath_search(diff, + "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS); + if (numXpathResults(xpathObj) > 0) { + xmlNode *aborted = getXpathResult(xpathObj, 0); + + abort_transition(INFINITY, pcmk__graph_restart, + "Ticket attribute: removal", aborted); + goto bail; + } + freeXpathObject(xpathObj); + + /* Transient Attributes - Removed */ + xpathObj = + xpath_search(diff, + "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" + XML_TAG_TRANSIENT_NODEATTRS); + if (numXpathResults(xpathObj) > 0) { + xmlNode *aborted = getXpathResult(xpathObj, 0); + + abort_transition(INFINITY, pcmk__graph_restart, + "Transient attribute: removal", aborted); + goto bail; + + } + freeXpathObject(xpathObj); + + // Check for lrm_resource entries + xpathObj = xpath_search(diff, + "//" F_CIB_UPDATE_RESULT + "//" XML_TAG_DIFF_ADDED + "//" XML_LRM_TAG_RESOURCE); + max = numXpathResults(xpathObj); + + /* + * Updates by, or in response to, graph actions will never affect more than + * one resource at a time, so such updates indicate an LRM refresh. In that + * case, start a new transition rather than check each result individually, + * which can result in _huge_ speedups in large clusters. + * + * Unfortunately, we can only do so when there are no pending actions. + * Otherwise, we could mistakenly throw away those results here, and + * the cluster will stall waiting for them and time out the operation. + */ + if ((controld_globals.transition_graph->pending == 0) && (max > 1)) { + crm_debug("Ignoring resource operation updates due to history refresh of %d resources", + max); + crm_log_xml_trace(diff, "lrm-refresh"); + abort_transition(INFINITY, pcmk__graph_restart, "History refresh", + NULL); + goto bail; + } + + if (max == 1) { + xmlNode *lrm_resource = getXpathResult(xpathObj, 0); + + if (shutdown_lock_cleared(lrm_resource)) { + // @TODO would be more efficient to abort once after transition done + abort_transition(INFINITY, pcmk__graph_restart, + "Shutdown lock cleared", lrm_resource); + // Still process results, so we stop timers and update failcounts + } + } + freeXpathObject(xpathObj); + + /* Process operation updates */ + xpathObj = + xpath_search(diff, + "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); + max = numXpathResults(xpathObj); + if (max > 0) { + int lpc = 0; + + for (lpc = 0; lpc < max; lpc++) { + xmlNode *rsc_op = getXpathResult(xpathObj, lpc); + const char *node = get_node_id(rsc_op); + + process_graph_event(rsc_op, node); + } + } + freeXpathObject(xpathObj); + + /* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */ + xpathObj = xpath_search(diff, "//" XML_TAG_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP); + max = numXpathResults(xpathObj); + for (lpc = 0; lpc < max; lpc++) { + const char *op_id = NULL; + xmlXPathObject *op_match = NULL; + xmlNode *match = getXpathResult(xpathObj, lpc); + + CRM_LOG_ASSERT(match != NULL); + if(match == NULL) { continue; }; + + op_id = ID(match); + + if (rsc_op_xpath == NULL) { + rsc_op_xpath = g_string_new(RSC_OP_PREFIX); + } else { + g_string_truncate(rsc_op_xpath, sizeof(RSC_OP_PREFIX) - 1); + } + pcmk__g_strcat(rsc_op_xpath, op_id, "']", NULL); + + op_match = xpath_search(diff, (const char *) rsc_op_xpath->str); + if (numXpathResults(op_match) == 0) { + /* Prevent false positives by matching cancelations too */ + const char *node = get_node_id(match); + pcmk__graph_action_t *cancelled = get_cancel_action(op_id, node); + + if (cancelled == NULL) { + crm_debug("No match for deleted action %s (%s on %s)", + (const char *) rsc_op_xpath->str, op_id, node); + abort_transition(INFINITY, pcmk__graph_restart, + "Resource op removal", match); + freeXpathObject(op_match); + goto bail; + + } else { + crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d", + op_id, node, cancelled->id); + } + } + + freeXpathObject(op_match); + } + + bail: + freeXpathObject(xpathObj); + if (rsc_op_xpath != NULL) { + g_string_free(rsc_op_xpath, TRUE); + } +} + +static void +process_lrm_resource_diff(xmlNode *lrm_resource, const char *node) +{ + for (xmlNode *rsc_op = pcmk__xml_first_child(lrm_resource); rsc_op != NULL; + rsc_op = pcmk__xml_next(rsc_op)) { + process_graph_event(rsc_op, node); + } + if (shutdown_lock_cleared(lrm_resource)) { + // @TODO would be more efficient to abort once after transition done + abort_transition(INFINITY, pcmk__graph_restart, "Shutdown lock cleared", + lrm_resource); + } +} + +static void +process_resource_updates(const char *node, xmlNode *xml, xmlNode *change, + const char *op, const char *xpath) +{ + xmlNode *rsc = NULL; + + if (xml == NULL) { + return; + } + + if (strcmp(TYPE(xml), XML_CIB_TAG_LRM) == 0) { + xml = first_named_child(xml, XML_LRM_TAG_RESOURCES); + CRM_CHECK(xml != NULL, return); + } + + CRM_CHECK(strcmp(TYPE(xml), XML_LRM_TAG_RESOURCES) == 0, return); + + /* + * Updates by, or in response to, TE actions will never contain updates + * for more than one resource at a time, so such updates indicate an + * LRM refresh. + * + * In that case, start a new transition rather than check each result + * individually, which can result in _huge_ speedups in large clusters. + * + * Unfortunately, we can only do so when there are no pending actions. + * Otherwise, we could mistakenly throw away those results here, and + * the cluster will stall waiting for them and time out the operation. + */ + if ((controld_globals.transition_graph->pending == 0) + && (xml->children != NULL) && (xml->children->next != NULL)) { + + crm_log_xml_trace(change, "lrm-refresh"); + abort_transition(INFINITY, pcmk__graph_restart, "History refresh", + NULL); + return; + } + + for (rsc = pcmk__xml_first_child(xml); rsc != NULL; + rsc = pcmk__xml_next(rsc)) { + crm_trace("Processing %s", ID(rsc)); + process_lrm_resource_diff(rsc, node); + } +} + +static char *extract_node_uuid(const char *xpath) +{ + char *mutable_path = strdup(xpath); + char *node_uuid = NULL; + char *search = NULL; + char *match = NULL; + + match = strstr(mutable_path, "node_state[@" XML_ATTR_ID "=\'"); + if (match == NULL) { + free(mutable_path); + return NULL; + } + match += strlen("node_state[@" XML_ATTR_ID "=\'"); + + search = strchr(match, '\''); + if (search == NULL) { + free(mutable_path); + return NULL; + } + search[0] = 0; + + node_uuid = strdup(match); + free(mutable_path); + return node_uuid; +} + +static void +abort_unless_down(const char *xpath, const char *op, xmlNode *change, + const char *reason) +{ + char *node_uuid = NULL; + pcmk__graph_action_t *down = NULL; + + if(!pcmk__str_eq(op, "delete", pcmk__str_casei)) { + abort_transition(INFINITY, pcmk__graph_restart, reason, change); + return; + } + + node_uuid = extract_node_uuid(xpath); + if(node_uuid == NULL) { + crm_err("Could not extract node ID from %s", xpath); + abort_transition(INFINITY, pcmk__graph_restart, reason, change); + return; + } + + down = match_down_event(node_uuid); + if (down == NULL) { + crm_trace("Not expecting %s to be down (%s)", node_uuid, xpath); + abort_transition(INFINITY, pcmk__graph_restart, reason, change); + } else { + crm_trace("Expecting changes to %s (%s)", node_uuid, xpath); + } + free(node_uuid); +} + +static void +process_op_deletion(const char *xpath, xmlNode *change) +{ + char *mutable_key = strdup(xpath); + char *key; + char *node_uuid; + + // Extract the part of xpath between last pair of single quotes + key = strrchr(mutable_key, '\''); + if (key != NULL) { + *key = '\0'; + key = strrchr(mutable_key, '\''); + } + if (key == NULL) { + crm_warn("Ignoring malformed CIB update (resource deletion of %s)", + xpath); + free(mutable_key); + return; + } + ++key; + + node_uuid = extract_node_uuid(xpath); + if (confirm_cancel_action(key, node_uuid) == FALSE) { + abort_transition(INFINITY, pcmk__graph_restart, + "Resource operation removal", change); + } + free(mutable_key); + free(node_uuid); +} + +static void +process_delete_diff(const char *xpath, const char *op, xmlNode *change) +{ + if (strstr(xpath, "/" XML_LRM_TAG_RSC_OP "[")) { + process_op_deletion(xpath, change); + + } else if (strstr(xpath, "/" XML_CIB_TAG_LRM "[")) { + abort_unless_down(xpath, op, change, "Resource state removal"); + + } else if (strstr(xpath, "/" XML_CIB_TAG_STATE "[")) { + abort_unless_down(xpath, op, change, "Node state removal"); + + } else { + crm_trace("Ignoring delete of %s", xpath); + } +} + +static void +process_node_state_diff(xmlNode *state, xmlNode *change, const char *op, + const char *xpath) +{ + xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM); + + process_resource_updates(ID(state), lrm, change, op, xpath); +} + +static void +process_status_diff(xmlNode *status, xmlNode *change, const char *op, + const char *xpath) +{ + for (xmlNode *state = pcmk__xml_first_child(status); state != NULL; + state = pcmk__xml_next(state)) { + process_node_state_diff(state, change, op, xpath); + } +} + +static void +process_cib_diff(xmlNode *cib, xmlNode *change, const char *op, + const char *xpath) +{ + xmlNode *status = first_named_child(cib, XML_CIB_TAG_STATUS); + xmlNode *config = first_named_child(cib, XML_CIB_TAG_CONFIGURATION); + + if (status) { + process_status_diff(status, change, op, xpath); + } + if (config) { + abort_transition(INFINITY, pcmk__graph_restart, + "Non-status-only change", change); + } +} + +static void +te_update_diff_v2(xmlNode *diff) +{ + crm_log_xml_trace(diff, "Patch:Raw"); + + for (xmlNode *change = pcmk__xml_first_child(diff); change != NULL; + change = pcmk__xml_next(change)) { + + xmlNode *match = NULL; + const char *name = NULL; + const char *xpath = crm_element_value(change, XML_DIFF_PATH); + + // Possible ops: create, modify, delete, move + const char *op = crm_element_value(change, XML_DIFF_OP); + + // Ignore uninteresting updates + if (op == NULL) { + continue; + + } else if (xpath == NULL) { + crm_trace("Ignoring %s change for version field", op); + continue; + + } else if ((strcmp(op, "move") == 0) + && (strstr(xpath, + "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION + "/" XML_CIB_TAG_RESOURCES) == NULL)) { + /* We still need to consider moves within the resources section, + * since they affect placement order. + */ + crm_trace("Ignoring move change at %s", xpath); + continue; + } + + // Find the result of create/modify ops + if (strcmp(op, "create") == 0) { + match = change->children; + + } else if (strcmp(op, "modify") == 0) { + match = first_named_child(change, XML_DIFF_RESULT); + if(match) { + match = match->children; + } + + } else if (!pcmk__str_any_of(op, "delete", "move", NULL)) { + crm_warn("Ignoring malformed CIB update (%s operation on %s is unrecognized)", + op, xpath); + continue; + } + + if (match) { + if (match->type == XML_COMMENT_NODE) { + crm_trace("Ignoring %s operation for comment at %s", op, xpath); + continue; + } + name = (const char *)match->name; + } + + crm_trace("Handling %s operation for %s%s%s", + op, (xpath? xpath : "CIB"), + (name? " matched by " : ""), (name? name : "")); + + if (strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION)) { + abort_transition(INFINITY, pcmk__graph_restart, + "Configuration change", change); + break; // Won't be packaged with operation results we may be waiting for + + } else if (strstr(xpath, "/" XML_CIB_TAG_TICKETS) + || pcmk__str_eq(name, XML_CIB_TAG_TICKETS, pcmk__str_none)) { + abort_transition(INFINITY, pcmk__graph_restart, + "Ticket attribute change", change); + break; // Won't be packaged with operation results we may be waiting for + + } else if (strstr(xpath, "/" XML_TAG_TRANSIENT_NODEATTRS "[") + || pcmk__str_eq(name, XML_TAG_TRANSIENT_NODEATTRS, + pcmk__str_none)) { + abort_unless_down(xpath, op, change, "Transient attribute change"); + break; // Won't be packaged with operation results we may be waiting for + + } else if (strcmp(op, "delete") == 0) { + process_delete_diff(xpath, op, change); + + } else if (name == NULL) { + crm_warn("Ignoring malformed CIB update (%s at %s has no result)", + op, xpath); + + } else if (strcmp(name, XML_TAG_CIB) == 0) { + process_cib_diff(match, change, op, xpath); + + } else if (strcmp(name, XML_CIB_TAG_STATUS) == 0) { + process_status_diff(match, change, op, xpath); + + } else if (strcmp(name, XML_CIB_TAG_STATE) == 0) { + process_node_state_diff(match, change, op, xpath); + + } else if (strcmp(name, XML_CIB_TAG_LRM) == 0) { + process_resource_updates(ID(match), match, change, op, xpath); + + } else if (strcmp(name, XML_LRM_TAG_RESOURCES) == 0) { + char *local_node = pcmk__xpath_node_id(xpath, "lrm"); + + process_resource_updates(local_node, match, change, op, xpath); + free(local_node); + + } else if (strcmp(name, XML_LRM_TAG_RESOURCE) == 0) { + char *local_node = pcmk__xpath_node_id(xpath, "lrm"); + + process_lrm_resource_diff(match, local_node); + free(local_node); + + } else if (strcmp(name, XML_LRM_TAG_RSC_OP) == 0) { + char *local_node = pcmk__xpath_node_id(xpath, "lrm"); + + process_graph_event(match, local_node); + free(local_node); + + } else { + crm_warn("Ignoring malformed CIB update (%s at %s has unrecognized result %s)", + op, xpath, name); + } + } +} + +void +te_update_diff(const char *event, xmlNode * msg) +{ + xmlNode *diff = NULL; + const char *op = NULL; + int rc = -EINVAL; + int format = 1; + int p_add[] = { 0, 0, 0 }; + int p_del[] = { 0, 0, 0 }; + + CRM_CHECK(msg != NULL, return); + crm_element_value_int(msg, F_CIB_RC, &rc); + + if (controld_globals.transition_graph == NULL) { + crm_trace("No graph"); + return; + + } else if (rc < pcmk_ok) { + crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc)); + return; + + } else if (controld_globals.transition_graph->complete + && (controld_globals.fsa_state != S_IDLE) + && (controld_globals.fsa_state != S_TRANSITION_ENGINE) + && (controld_globals.fsa_state != S_POLICY_ENGINE)) { + crm_trace("Filter state=%s (complete)", + fsa_state2string(controld_globals.fsa_state)); + return; + } + + op = crm_element_value(msg, F_CIB_OPERATION); + diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); + + xml_patch_versions(diff, p_add, p_del); + crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op, + p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2], + fsa_state2string(controld_globals.fsa_state)); + + crm_element_value_int(diff, "format", &format); + switch (format) { + case 1: + te_update_diff_v1(event, diff); + break; + case 2: + te_update_diff_v2(diff); + break; + default: + crm_warn("Ignoring malformed CIB update (unknown patch format %d)", + format); + } + controld_remove_all_outside_events(); +} + +void +process_te_message(xmlNode * msg, xmlNode * xml_data) +{ + const char *value = NULL; + xmlXPathObject *xpathObj = NULL; + int nmatches = 0; + + CRM_CHECK(msg != NULL, return); + + // Transition requests must specify transition engine as subsystem + value = crm_element_value(msg, F_CRM_SYS_TO); + if (pcmk__str_empty(value) + || !pcmk__str_eq(value, CRM_SYSTEM_TENGINE, pcmk__str_none)) { + crm_info("Received invalid transition request: subsystem '%s' not '" + CRM_SYSTEM_TENGINE "'", pcmk__s(value, "")); + return; + } + + // Only the lrm_invoke command is supported as a transition request + value = crm_element_value(msg, F_CRM_TASK); + if (!pcmk__str_eq(value, CRM_OP_INVOKE_LRM, pcmk__str_none)) { + crm_info("Received invalid transition request: command '%s' not '" + CRM_OP_INVOKE_LRM "'", pcmk__s(value, "")); + return; + } + + // Transition requests must be marked as coming from the executor + value = crm_element_value(msg, F_CRM_SYS_FROM); + if (!pcmk__str_eq(value, CRM_SYSTEM_LRMD, pcmk__str_none)) { + crm_info("Received invalid transition request: from '%s' not '" + CRM_SYSTEM_LRMD "'", pcmk__s(value, "")); + return; + } + + crm_debug("Processing transition request with ref='%s' origin='%s'", + pcmk__s(crm_element_value(msg, F_CRM_REFERENCE), ""), + pcmk__s(crm_element_value(msg, F_ORIG), "")); + + xpathObj = xpath_search(xml_data, "//" XML_LRM_TAG_RSC_OP); + nmatches = numXpathResults(xpathObj); + if (nmatches == 0) { + crm_err("Received transition request with no results (bug?)"); + } else { + for (int lpc = 0; lpc < nmatches; lpc++) { + xmlNode *rsc_op = getXpathResult(xpathObj, lpc); + const char *node = get_node_id(rsc_op); + + process_graph_event(rsc_op, node); + } + } + freeXpathObject(xpathObj); +} + +void +cib_action_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + if (rc < pcmk_ok) { + crm_err("Update %d FAILED: %s", call_id, pcmk_strerror(rc)); + } +} + +/*! + * \brief Handle a timeout in node-to-node communication + * + * \param[in,out] data Pointer to graph action + * + * \return FALSE (indicating that source should be not be re-added) + */ +gboolean +action_timer_callback(gpointer data) +{ + pcmk__graph_action_t *action = (pcmk__graph_action_t *) data; + const char *task = NULL; + const char *on_node = NULL; + const char *via_node = NULL; + + CRM_CHECK(data != NULL, return FALSE); + + stop_te_timer(action); + + task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); + on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + via_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); + + if (controld_globals.transition_graph->complete) { + crm_notice("Node %s did not send %s result (via %s) within %dms " + "(ignoring because transition not in progress)", + (on_node? on_node : ""), (task? task : "unknown action"), + (via_node? via_node : "controller"), action->timeout); + } else { + /* fail the action */ + + crm_err("Node %s did not send %s result (via %s) within %dms " + "(action timeout plus cluster-delay)", + (on_node? on_node : ""), (task? task : "unknown action"), + (via_node? via_node : "controller"), + (action->timeout + + controld_globals.transition_graph->network_delay)); + pcmk__log_graph_action(LOG_ERR, action); + + pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); + + te_action_confirmed(action, controld_globals.transition_graph); + abort_transition(INFINITY, pcmk__graph_restart, "Action lost", NULL); + + // Record timeout in the CIB if appropriate + if ((action->type == pcmk__rsc_graph_action) + && controld_action_is_recordable(task)) { + controld_record_action_timeout(action); + } + } + + return FALSE; +} diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c new file mode 100644 index 0000000..d4e2b0f --- /dev/null +++ b/daemons/controld/controld_te_events.c @@ -0,0 +1,601 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <sys/param.h> +#include <crm/crm.h> +#include <crm/cib.h> +#include <crm/msg_xml.h> +#include <crm/common/xml.h> + +#include <pacemaker-controld.h> + +#include <crm/common/attrd_internal.h> +#include <crm/common/ipc_attrd_internal.h> + +/*! + * \internal + * \brief Action numbers of outside events processed in current update diff + * + * This table is to be used as a set. It should be empty when the transitioner + * begins processing a CIB update diff. It ensures that if there are multiple + * events (for example, "_last_0" and "_last_failure_0") for the same action, + * only one of them updates the failcount. Events that originate outside the + * cluster can't be confirmed, since they're not in the transition graph. + */ +static GHashTable *outside_events = NULL; + +/*! + * \internal + * \brief Empty the hash table containing action numbers of outside events + */ +void +controld_remove_all_outside_events(void) +{ + if (outside_events != NULL) { + g_hash_table_remove_all(outside_events); + } +} + +/*! + * \internal + * \brief Destroy the hash table containing action numbers of outside events + */ +void +controld_destroy_outside_events_table(void) +{ + if (outside_events != NULL) { + g_hash_table_destroy(outside_events); + outside_events = NULL; + } +} + +/*! + * \internal + * \brief Add an outside event's action number to a set + * + * \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the + * event was not already in the set, or \p pcmk_rc_already otherwise. + */ +static int +record_outside_event(gint action_num) +{ + if (outside_events == NULL) { + outside_events = g_hash_table_new(NULL, NULL); + } + + if (g_hash_table_add(outside_events, GINT_TO_POINTER(action_num))) { + return pcmk_rc_ok; + } + return pcmk_rc_already; +} + +gboolean +fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node) +{ + const char *target_uuid = NULL; + const char *router = NULL; + const char *router_uuid = NULL; + xmlNode *last_action = NULL; + + GList *gIter = NULL; + GList *gIter2 = NULL; + + if (graph == NULL || graph->complete) { + return FALSE; + } + + gIter = graph->synapses; + for (; gIter != NULL; gIter = gIter->next) { + pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data; + + if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) { + /* We've already been here */ + continue; + } + + gIter2 = synapse->actions; + for (; gIter2 != NULL; gIter2 = gIter2->next) { + pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data; + + if ((action->type == pcmk__pseudo_graph_action) + || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { + continue; + } else if (action->type == pcmk__cluster_graph_action) { + const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); + + if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) { + continue; + } + } + + target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); + router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); + if (router) { + crm_node_t *node = crm_get_peer(0, router); + if (node) { + router_uuid = node->uuid; + } + } + + if (pcmk__str_eq(target_uuid, down_node, pcmk__str_casei) || pcmk__str_eq(router_uuid, down_node, pcmk__str_casei)) { + pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); + pcmk__set_synapse_flags(synapse, pcmk__synapse_failed); + last_action = action->xml; + stop_te_timer(action); + pcmk__update_graph(graph, action); + + if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) { + crm_notice("Action %d (%s) was pending on %s (offline)", + action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node); + } else { + crm_info("Action %d (%s) is scheduled for %s (offline)", + action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node); + } + } + } + } + + if (last_action != NULL) { + crm_info("Node %s shutdown resulted in un-runnable actions", down_node); + abort_transition(INFINITY, pcmk__graph_restart, "Node failure", + last_action); + return TRUE; + } + + return FALSE; +} + +/*! + * \internal + * \brief Update failure-related node attributes if warranted + * + * \param[in] event XML describing operation that (maybe) failed + * \param[in] event_node_uuid Node that event occurred on + * \param[in] rc Actual operation return code + * \param[in] target_rc Expected operation return code + * \param[in] do_update If TRUE, do update regardless of operation type + * \param[in] ignore_failures If TRUE, update last failure but not fail count + * + * \return TRUE if this was not a direct nack, success or lrm status refresh + */ +static gboolean +update_failcount(const xmlNode *event, const char *event_node_uuid, int rc, + int target_rc, gboolean do_update, gboolean ignore_failures) +{ + guint interval_ms = 0; + + char *task = NULL; + char *rsc_id = NULL; + + const char *value = NULL; + const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY); + const char *on_uname = crm_peer_uname(event_node_uuid); + const char *origin = crm_element_value(event, XML_ATTR_ORIGIN); + + // Nothing needs to be done for success or status refresh + if (rc == target_rc) { + return FALSE; + } else if (pcmk__str_eq(origin, "build_active_RAs", pcmk__str_casei)) { + crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh", + id, rc, on_uname); + return FALSE; + } + + /* Sanity check */ + CRM_CHECK(on_uname != NULL, return TRUE); + CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval_ms), + crm_err("Couldn't parse: %s", ID(event)); goto bail); + + /* Decide whether update is necessary and what value to use */ + if ((interval_ms > 0) + || pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_none) + || pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_none)) { + do_update = TRUE; + + } else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_none)) { + do_update = TRUE; + value = pcmk__s(controld_globals.transition_graph->failed_start_offset, + CRM_INFINITY_S); + + } else if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_none)) { + do_update = TRUE; + value = pcmk__s(controld_globals.transition_graph->failed_stop_offset, + CRM_INFINITY_S); + } + + if (do_update) { + pcmk__attrd_query_pair_t *fail_pair = NULL; + pcmk__attrd_query_pair_t *last_pair = NULL; + char *fail_name = NULL; + char *last_name = NULL; + GList *attrs = NULL; + + uint32_t opts = pcmk__node_attr_none; + + char *now = pcmk__ttoa(time(NULL)); + + // Fail count will be either incremented or set to infinity + if (!pcmk_str_is_infinity(value)) { + value = XML_NVPAIR_ATTR_VALUE "++"; + } + + if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) { + opts |= pcmk__node_attr_remote; + } + + crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)", + (ignore_failures? "last failure" : "failcount"), + rsc_id, on_uname, task, rc, value, now); + + /* Update the fail count, if we're not ignoring failures */ + if (!ignore_failures) { + fail_pair = calloc(1, sizeof(pcmk__attrd_query_pair_t)); + CRM_ASSERT(fail_pair != NULL); + + fail_name = pcmk__failcount_name(rsc_id, task, interval_ms); + fail_pair->name = fail_name; + fail_pair->value = value; + fail_pair->node = on_uname; + + attrs = g_list_prepend(attrs, fail_pair); + } + + /* Update the last failure time (even if we're ignoring failures, + * so that failure can still be detected and shown, e.g. by crm_mon) + */ + last_pair = calloc(1, sizeof(pcmk__attrd_query_pair_t)); + CRM_ASSERT(last_pair != NULL); + + last_name = pcmk__lastfailure_name(rsc_id, task, interval_ms); + last_pair->name = last_name; + last_pair->value = now; + last_pair->node = on_uname; + + attrs = g_list_prepend(attrs, last_pair); + + update_attrd_list(attrs, opts); + + free(fail_name); + free(fail_pair); + + free(last_name); + free(last_pair); + g_list_free(attrs); + + free(now); + } + + bail: + free(rsc_id); + free(task); + return TRUE; +} + +pcmk__graph_action_t * +controld_get_action(int id) +{ + for (GList *item = controld_globals.transition_graph->synapses; + item != NULL; item = item->next) { + pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) item->data; + + for (GList *item2 = synapse->actions; item2; item2 = item2->next) { + pcmk__graph_action_t *action = (pcmk__graph_action_t *) item2->data; + + if (action->id == id) { + return action; + } + } + } + return NULL; +} + +pcmk__graph_action_t * +get_cancel_action(const char *id, const char *node) +{ + GList *gIter = NULL; + GList *gIter2 = NULL; + + gIter = controld_globals.transition_graph->synapses; + for (; gIter != NULL; gIter = gIter->next) { + pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data; + + gIter2 = synapse->actions; + for (; gIter2 != NULL; gIter2 = gIter2->next) { + const char *task = NULL; + const char *target = NULL; + pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data; + + task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); + if (!pcmk__str_eq(CRMD_ACTION_CANCEL, task, pcmk__str_casei)) { + continue; + } + + task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); + if (!pcmk__str_eq(task, id, pcmk__str_casei)) { + crm_trace("Wrong key %s for %s on %s", task, id, node); + continue; + } + + target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); + if (node && !pcmk__str_eq(target, node, pcmk__str_casei)) { + crm_trace("Wrong node %s for %s on %s", target, id, node); + continue; + } + + crm_trace("Found %s on %s", id, node); + return action; + } + } + + return NULL; +} + +bool +confirm_cancel_action(const char *id, const char *node_id) +{ + const char *op_key = NULL; + const char *node_name = NULL; + pcmk__graph_action_t *cancel = get_cancel_action(id, node_id); + + if (cancel == NULL) { + return FALSE; + } + op_key = crm_element_value(cancel->xml, XML_LRM_ATTR_TASK_KEY); + node_name = crm_element_value(cancel->xml, XML_LRM_ATTR_TARGET); + + stop_te_timer(cancel); + te_action_confirmed(cancel, controld_globals.transition_graph); + + crm_info("Cancellation of %s on %s confirmed (action %d)", + op_key, node_name, cancel->id); + return TRUE; +} + +/* downed nodes are listed like: <downed> <node id="UUID1" /> ... </downed> */ +#define XPATH_DOWNED "//" XML_GRAPH_TAG_DOWNED \ + "/" XML_CIB_TAG_NODE "[@" XML_ATTR_ID "='%s']" + +/*! + * \brief Find a transition event that would have made a specified node down + * + * \param[in] target UUID of node to match + * + * \return Matching event if found, NULL otherwise + */ +pcmk__graph_action_t * +match_down_event(const char *target) +{ + pcmk__graph_action_t *match = NULL; + xmlXPathObjectPtr xpath_ret = NULL; + GList *gIter, *gIter2; + + char *xpath = crm_strdup_printf(XPATH_DOWNED, target); + + for (gIter = controld_globals.transition_graph->synapses; + gIter != NULL && match == NULL; + gIter = gIter->next) { + + for (gIter2 = ((pcmk__graph_synapse_t * ) gIter->data)->actions; + gIter2 != NULL && match == NULL; + gIter2 = gIter2->next) { + + match = (pcmk__graph_action_t *) gIter2->data; + if (pcmk_is_set(match->flags, pcmk__graph_action_executed)) { + xpath_ret = xpath_search(match->xml, xpath); + if (numXpathResults(xpath_ret) < 1) { + match = NULL; + } + freeXpathObject(xpath_ret); + } else { + // Only actions that were actually started can match + match = NULL; + } + } + } + + free(xpath); + + if (match != NULL) { + crm_debug("Shutdown action %d (%s) found for node %s", match->id, + crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY), target); + } else { + crm_debug("No reason to expect node %s to be down", target); + } + return match; +} + +void +process_graph_event(xmlNode *event, const char *event_node) +{ + int rc = -1; // Actual result + int target_rc = -1; // Expected result + int status = -1; // Executor status + int callid = -1; // Executor call ID + int transition_num = -1; // Transition number + int action_num = -1; // Action number within transition + char *update_te_uuid = NULL; + bool ignore_failures = FALSE; + const char *id = NULL; + const char *desc = NULL; + const char *magic = NULL; + const char *uname = NULL; + + CRM_ASSERT(event != NULL); + +/* +<lrm_rsc_op id="rsc_east-05_last_0" operation_key="rsc_east-05_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.6" transition-key="9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" transition-magic="0:7;9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" call-id="17" rc-code="7" op-status="0" interval="0" last-rc-change="1355361636" exec-time="128" queue-time="0" op-digest="c81f5f40b1c9e859c992e800b1aa6972"/> +*/ + + magic = crm_element_value(event, XML_ATTR_TRANSITION_KEY); + if (magic == NULL) { + /* non-change */ + return; + } + + crm_element_value_int(event, XML_LRM_ATTR_OPSTATUS, &status); + if (status == PCMK_EXEC_PENDING) { + return; + } + + id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY); + crm_element_value_int(event, XML_LRM_ATTR_RC, &rc); + crm_element_value_int(event, XML_LRM_ATTR_CALLID, &callid); + + rc = pcmk__effective_rc(rc); + + if (decode_transition_key(magic, &update_te_uuid, &transition_num, + &action_num, &target_rc) == FALSE) { + // decode_transition_key() already logged the bad key + crm_err("Can't process action %s result: Incompatible versions? " + CRM_XS " call-id=%d", id, callid); + abort_transition(INFINITY, pcmk__graph_restart, "Bad event", event); + return; + } + + if (transition_num == -1) { + // E.g. crm_resource --fail + if (record_outside_event(action_num) != pcmk_rc_ok) { + crm_debug("Outside event with transition key '%s' has already been " + "processed", magic); + goto bail; + } + desc = "initiated outside of the cluster"; + abort_transition(INFINITY, pcmk__graph_restart, "Unexpected event", + event); + + } else if ((action_num < 0) + || !pcmk__str_eq(update_te_uuid, controld_globals.te_uuid, + pcmk__str_none)) { + desc = "initiated by a different DC"; + abort_transition(INFINITY, pcmk__graph_restart, "Foreign event", event); + + } else if ((controld_globals.transition_graph->id != transition_num) + || controld_globals.transition_graph->complete) { + + // Action is not from currently active transition + + guint interval_ms = 0; + + if (parse_op_key(id, NULL, NULL, &interval_ms) + && (interval_ms != 0)) { + /* Recurring actions have the transition number they were first + * scheduled in. + */ + + if (status == PCMK_EXEC_CANCELLED) { + confirm_cancel_action(id, get_node_id(event)); + goto bail; + } + + desc = "arrived after initial scheduling"; + abort_transition(INFINITY, pcmk__graph_restart, + "Change in recurring result", event); + + } else if (controld_globals.transition_graph->id != transition_num) { + desc = "arrived really late"; + abort_transition(INFINITY, pcmk__graph_restart, "Old event", event); + } else { + desc = "arrived late"; + abort_transition(INFINITY, pcmk__graph_restart, "Inactive graph", + event); + } + + } else { + // Event is result of an action from currently active transition + pcmk__graph_action_t *action = controld_get_action(action_num); + + if (action == NULL) { + // Should never happen + desc = "unknown"; + abort_transition(INFINITY, pcmk__graph_restart, "Unknown event", + event); + + } else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { + /* Nothing further needs to be done if the action has already been + * confirmed. This can happen e.g. when processing both an + * "xxx_last_0" or "xxx_last_failure_0" record as well as the main + * history record, which would otherwise result in incorrectly + * bumping the fail count twice. + */ + crm_log_xml_debug(event, "Event already confirmed:"); + goto bail; + + } else { + /* An action result needs to be confirmed. + * (This is the only case where desc == NULL.) + */ + + if (pcmk__str_eq(crm_meta_value(action->params, XML_OP_ATTR_ON_FAIL), "ignore", pcmk__str_casei)) { + ignore_failures = TRUE; + + } else if (rc != target_rc) { + pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); + } + + stop_te_timer(action); + te_action_confirmed(action, controld_globals.transition_graph); + + if (pcmk_is_set(action->flags, pcmk__graph_action_failed)) { + abort_transition(action->synapse->priority + 1, + pcmk__graph_restart, "Event failed", event); + } + } + } + + if (id == NULL) { + id = "unknown action"; + } + uname = crm_element_value(event, XML_LRM_ATTR_TARGET); + if (uname == NULL) { + uname = "unknown node"; + } + + if (status == PCMK_EXEC_INVALID) { + // We couldn't attempt the action + crm_info("Transition %d action %d (%s on %s): %s", + transition_num, action_num, id, uname, + pcmk_exec_status_str(status)); + + } else if (desc && update_failcount(event, event_node, rc, target_rc, + (transition_num == -1), FALSE)) { + crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' " + CRM_XS " target-rc=%d rc=%d call-id=%d event='%s'", + transition_num, action_num, id, uname, + services_ocf_exitcode_str(target_rc), + services_ocf_exitcode_str(rc), + target_rc, rc, callid, desc); + + } else if (desc) { + crm_info("Transition %d action %d (%s on %s): %s " + CRM_XS " rc=%d target-rc=%d call-id=%d", + transition_num, action_num, id, uname, + desc, rc, target_rc, callid); + + } else if (rc == target_rc) { + crm_info("Transition %d action %d (%s on %s) confirmed: %s " + CRM_XS " rc=%d call-id=%d", + transition_num, action_num, id, uname, + services_ocf_exitcode_str(rc), rc, callid); + + } else { + update_failcount(event, event_node, rc, target_rc, + (transition_num == -1), ignore_failures); + crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' " + CRM_XS " target-rc=%d rc=%d call-id=%d", + transition_num, action_num, id, uname, + services_ocf_exitcode_str(target_rc), + services_ocf_exitcode_str(rc), + target_rc, rc, callid); + } + + bail: + free(update_te_uuid); +} diff --git a/daemons/controld/controld_te_utils.c b/daemons/controld/controld_te_utils.c new file mode 100644 index 0000000..ecbc0b2 --- /dev/null +++ b/daemons/controld/controld_te_utils.c @@ -0,0 +1,367 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> +#include <crm/crm.h> +#include <crm/msg_xml.h> +#include <crm/common/xml.h> + +#include <pacemaker-controld.h> + +//! Triggers transition graph processing +static crm_trigger_t *transition_trigger = NULL; + +gboolean +stop_te_timer(pcmk__graph_action_t *action) +{ + if (action == NULL) { + return FALSE; + } + if (action->timer != 0) { + crm_trace("Stopping action timer"); + g_source_remove(action->timer); + action->timer = 0; + } else { + crm_trace("Action timer was already stopped"); + return FALSE; + } + return TRUE; +} + +static gboolean +te_graph_trigger(gpointer user_data) +{ + if (controld_globals.transition_graph == NULL) { + crm_debug("Nothing to do"); + return TRUE; + } + + crm_trace("Invoking graph %d in state %s", + controld_globals.transition_graph->id, + fsa_state2string(controld_globals.fsa_state)); + + switch (controld_globals.fsa_state) { + case S_STARTING: + case S_PENDING: + case S_NOT_DC: + case S_HALT: + case S_ILLEGAL: + case S_STOPPING: + case S_TERMINATE: + return TRUE; + default: + break; + } + + if (!controld_globals.transition_graph->complete) { + enum pcmk__graph_status graph_rc; + int orig_limit = controld_globals.transition_graph->batch_limit; + int throttled_limit = throttle_get_total_job_limit(orig_limit); + + controld_globals.transition_graph->batch_limit = throttled_limit; + graph_rc = pcmk__execute_graph(controld_globals.transition_graph); + controld_globals.transition_graph->batch_limit = orig_limit; + + if (graph_rc == pcmk__graph_active) { + crm_trace("Transition not yet complete"); + return TRUE; + + } else if (graph_rc == pcmk__graph_pending) { + crm_trace("Transition not yet complete - no actions fired"); + return TRUE; + } + + if (graph_rc != pcmk__graph_complete) { + crm_warn("Transition failed: %s", + pcmk__graph_status2text(graph_rc)); + pcmk__log_graph(LOG_NOTICE, controld_globals.transition_graph); + } + } + + crm_debug("Transition %d is now complete", + controld_globals.transition_graph->id); + controld_globals.transition_graph->complete = true; + notify_crmd(controld_globals.transition_graph); + + return TRUE; +} + +/*! + * \internal + * \brief Initialize transition trigger + */ +void +controld_init_transition_trigger(void) +{ + transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, + NULL); +} + +/*! + * \internal + * \brief Destroy transition trigger + */ +void +controld_destroy_transition_trigger(void) +{ + mainloop_destroy_trigger(transition_trigger); + transition_trigger = NULL; +} + +void +controld_trigger_graph_as(const char *fn, int line) +{ + crm_trace("%s:%d - Triggered graph processing", fn, line); + mainloop_set_trigger(transition_trigger); +} + +static struct abort_timer_s { + bool aborted; + guint id; + int priority; + enum pcmk__graph_next action; + const char *text; +} abort_timer = { 0, }; + +static gboolean +abort_timer_popped(gpointer data) +{ + if (AM_I_DC && (abort_timer.aborted == FALSE)) { + abort_transition(abort_timer.priority, abort_timer.action, + abort_timer.text, NULL); + } + abort_timer.id = 0; + return FALSE; // do not immediately reschedule timer +} + +/*! + * \internal + * \brief Abort transition after delay, if not already aborted in that time + * + * \param[in] abort_text Must be literal string + */ +void +abort_after_delay(int abort_priority, enum pcmk__graph_next abort_action, + const char *abort_text, guint delay_ms) +{ + if (abort_timer.id) { + // Timer already in progress, stop and reschedule + g_source_remove(abort_timer.id); + } + abort_timer.aborted = FALSE; + abort_timer.priority = abort_priority; + abort_timer.action = abort_action; + abort_timer.text = abort_text; + abort_timer.id = g_timeout_add(delay_ms, abort_timer_popped, NULL); +} + +static const char * +abort2text(enum pcmk__graph_next abort_action) +{ + switch (abort_action) { + case pcmk__graph_done: return "done"; + case pcmk__graph_wait: return "stop"; + case pcmk__graph_restart: return "restart"; + case pcmk__graph_shutdown: return "shutdown"; + } + return "unknown"; +} + +static bool +update_abort_priority(pcmk__graph_t *graph, int priority, + enum pcmk__graph_next action, const char *abort_reason) +{ + bool change = FALSE; + + if (graph == NULL) { + return change; + } + + if (graph->abort_priority < priority) { + crm_debug("Abort priority upgraded from %d to %d", graph->abort_priority, priority); + graph->abort_priority = priority; + if (graph->abort_reason != NULL) { + crm_debug("'%s' abort superseded by %s", graph->abort_reason, abort_reason); + } + graph->abort_reason = abort_reason; + change = TRUE; + } + + if (graph->completion_action < action) { + crm_debug("Abort action %s superseded by %s: %s", + abort2text(graph->completion_action), abort2text(action), abort_reason); + graph->completion_action = action; + change = TRUE; + } + + return change; +} + +void +abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action, + const char *abort_text, const xmlNode *reason, + const char *fn, int line) +{ + int add[] = { 0, 0, 0 }; + int del[] = { 0, 0, 0 }; + int level = LOG_INFO; + const xmlNode *diff = NULL; + const xmlNode *change = NULL; + + CRM_CHECK(controld_globals.transition_graph != NULL, return); + + switch (controld_globals.fsa_state) { + case S_STARTING: + case S_PENDING: + case S_NOT_DC: + case S_HALT: + case S_ILLEGAL: + case S_STOPPING: + case S_TERMINATE: + crm_info("Abort %s suppressed: state=%s (%scomplete)", + abort_text, fsa_state2string(controld_globals.fsa_state), + (controld_globals.transition_graph->complete? "" : "in")); + return; + default: + break; + } + + abort_timer.aborted = TRUE; + controld_expect_sched_reply(NULL); + + if (!controld_globals.transition_graph->complete + && update_abort_priority(controld_globals.transition_graph, + abort_priority, abort_action, + abort_text)) { + level = LOG_NOTICE; + } + + if (reason != NULL) { + const xmlNode *search = NULL; + + for(search = reason; search; search = search->parent) { + if (pcmk__str_eq(XML_TAG_DIFF, TYPE(search), pcmk__str_casei)) { + diff = search; + break; + } + } + + if(diff) { + xml_patch_versions(diff, add, del); + for(search = reason; search; search = search->parent) { + if (pcmk__str_eq(XML_DIFF_CHANGE, TYPE(search), pcmk__str_casei)) { + change = search; + break; + } + } + } + } + + if (reason == NULL) { + do_crm_log(level, + "Transition %d aborted: %s " CRM_XS " source=%s:%d " + "complete=%s", controld_globals.transition_graph->id, + abort_text, fn, line, + pcmk__btoa(controld_globals.transition_graph->complete)); + + } else if(change == NULL) { + GString *local_path = pcmk__element_xpath(reason); + CRM_ASSERT(local_path != NULL); + + do_crm_log(level, "Transition %d aborted by %s.%s: %s " + CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", + controld_globals.transition_graph->id, TYPE(reason), + ID(reason), abort_text, add[0], add[1], add[2], fn, line, + (const char *) local_path->str, + pcmk__btoa(controld_globals.transition_graph->complete)); + g_string_free(local_path, TRUE); + + } else { + const char *kind = NULL; + const char *op = crm_element_value(change, XML_DIFF_OP); + const char *path = crm_element_value(change, XML_DIFF_PATH); + + if(change == reason) { + if(strcmp(op, "create") == 0) { + reason = reason->children; + + } else if(strcmp(op, "modify") == 0) { + reason = first_named_child(reason, XML_DIFF_RESULT); + if(reason) { + reason = reason->children; + } + } + } + + kind = TYPE(reason); + if(strcmp(op, "delete") == 0) { + const char *shortpath = strrchr(path, '/'); + + do_crm_log(level, "Transition %d aborted by deletion of %s: %s " + CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", + controld_globals.transition_graph->id, + (shortpath? (shortpath + 1) : path), abort_text, + add[0], add[1], add[2], fn, line, path, + pcmk__btoa(controld_globals.transition_graph->complete)); + + } else if (pcmk__str_eq(XML_CIB_TAG_NVPAIR, kind, pcmk__str_none)) { + do_crm_log(level, "Transition %d aborted by %s doing %s %s=%s: %s " + CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", + controld_globals.transition_graph->id, + crm_element_value(reason, XML_ATTR_ID), op, + crm_element_value(reason, XML_NVPAIR_ATTR_NAME), + crm_element_value(reason, XML_NVPAIR_ATTR_VALUE), + abort_text, add[0], add[1], add[2], fn, line, path, + pcmk__btoa(controld_globals.transition_graph->complete)); + + } else if (pcmk__str_eq(XML_LRM_TAG_RSC_OP, kind, pcmk__str_none)) { + const char *magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC); + + do_crm_log(level, "Transition %d aborted by operation %s '%s' on %s: %s " + CRM_XS " magic=%s cib=%d.%d.%d source=%s:%d complete=%s", + controld_globals.transition_graph->id, + crm_element_value(reason, XML_LRM_ATTR_TASK_KEY), op, + crm_element_value(reason, XML_LRM_ATTR_TARGET), abort_text, + magic, add[0], add[1], add[2], fn, line, + pcmk__btoa(controld_globals.transition_graph->complete)); + + } else if (pcmk__str_any_of(kind, XML_CIB_TAG_STATE, XML_CIB_TAG_NODE, NULL)) { + const char *uname = crm_peer_uname(ID(reason)); + + do_crm_log(level, "Transition %d aborted by %s '%s' on %s: %s " + CRM_XS " cib=%d.%d.%d source=%s:%d complete=%s", + controld_globals.transition_graph->id, + kind, op, (uname? uname : ID(reason)), abort_text, + add[0], add[1], add[2], fn, line, + pcmk__btoa(controld_globals.transition_graph->complete)); + + } else { + const char *id = ID(reason); + + do_crm_log(level, "Transition %d aborted by %s.%s '%s': %s " + CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", + controld_globals.transition_graph->id, + TYPE(reason), (id? id : ""), (op? op : "change"), + abort_text, add[0], add[1], add[2], fn, line, path, + pcmk__btoa(controld_globals.transition_graph->complete)); + } + } + + if (controld_globals.transition_graph->complete) { + if (controld_get_period_transition_timer() > 0) { + controld_stop_transition_timer(); + controld_start_transition_timer(); + } else { + register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL); + } + return; + } + + trigger_graph(); +} diff --git a/daemons/controld/controld_throttle.c b/daemons/controld/controld_throttle.c new file mode 100644 index 0000000..5b7f9c0 --- /dev/null +++ b/daemons/controld/controld_throttle.c @@ -0,0 +1,574 @@ +/* + * Copyright 2013-2021 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <sys/types.h> +#include <sys/stat.h> + +#include <unistd.h> +#include <ctype.h> +#include <dirent.h> + +#include <crm/crm.h> +#include <crm/msg_xml.h> +#include <crm/cluster.h> + +#include <pacemaker-controld.h> + +/* These values don't need to be bits, but these particular values must be kept + * for backward compatibility during rolling upgrades. + */ +enum throttle_state_e { + throttle_none = 0x0000, + throttle_low = 0x0001, + throttle_med = 0x0010, + throttle_high = 0x0100, + throttle_extreme = 0x1000, +}; + +struct throttle_record_s { + int max; + enum throttle_state_e mode; + char *node; +}; + +static int throttle_job_max = 0; +static float throttle_load_target = 0.0; + +#define THROTTLE_FACTOR_LOW 1.2 +#define THROTTLE_FACTOR_MEDIUM 1.6 +#define THROTTLE_FACTOR_HIGH 2.0 + +static GHashTable *throttle_records = NULL; +static mainloop_timer_t *throttle_timer = NULL; + +static const char * +load2str(enum throttle_state_e mode) +{ + switch (mode) { + case throttle_extreme: return "extreme"; + case throttle_high: return "high"; + case throttle_med: return "medium"; + case throttle_low: return "low"; + case throttle_none: return "negligible"; + default: return "undetermined"; + } +} + +#if HAVE_LINUX_PROCFS +/*! + * \internal + * \brief Return name of /proc file containing the CIB daemon's load statistics + * + * \return Newly allocated memory with file name on success, NULL otherwise + * + * \note It is the caller's responsibility to free the return value. + * This will return NULL if the daemon is being run via valgrind. + * This should be called only on Linux systems. + */ +static char * +find_cib_loadfile(void) +{ + pid_t pid = pcmk__procfs_pid_of("pacemaker-based"); + + return pid? crm_strdup_printf("/proc/%lld/stat", (long long) pid) : NULL; +} + +static bool +throttle_cib_load(float *load) +{ +/* + /proc/[pid]/stat + Status information about the process. This is used by ps(1). It is defined in /usr/src/linux/fs/proc/array.c. + + The fields, in order, with their proper scanf(3) format specifiers, are: + + pid %d (1) The process ID. + + comm %s (2) The filename of the executable, in parentheses. This is visible whether or not the executable is swapped out. + + state %c (3) One character from the string "RSDZTW" where R is running, S is sleeping in an interruptible wait, D is waiting in uninterruptible disk sleep, Z is zombie, T is traced or stopped (on a signal), and W is paging. + + ppid %d (4) The PID of the parent. + + pgrp %d (5) The process group ID of the process. + + session %d (6) The session ID of the process. + + tty_nr %d (7) The controlling terminal of the process. (The minor device number is contained in the combination of bits 31 to 20 and 7 to 0; the major device number is in bits 15 to 8.) + + tpgid %d (8) The ID of the foreground process group of the controlling terminal of the process. + + flags %u (%lu before Linux 2.6.22) + (9) The kernel flags word of the process. For bit meanings, see the PF_* defines in the Linux kernel source file include/linux/sched.h. Details depend on the kernel version. + + minflt %lu (10) The number of minor faults the process has made which have not required loading a memory page from disk. + + cminflt %lu (11) The number of minor faults that the process's waited-for children have made. + + majflt %lu (12) The number of major faults the process has made which have required loading a memory page from disk. + + cmajflt %lu (13) The number of major faults that the process's waited-for children have made. + + utime %lu (14) Amount of time that this process has been scheduled in user mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)). This includes guest time, guest_time (time spent running a virtual CPU, see below), so that applications that are not aware of the guest time field do not lose that time from their calculations. + + stime %lu (15) Amount of time that this process has been scheduled in kernel mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)). + */ + + static char *loadfile = NULL; + static time_t last_call = 0; + static long ticks_per_s = 0; + static unsigned long last_utime, last_stime; + + char buffer[64*1024]; + FILE *stream = NULL; + time_t now = time(NULL); + + if(load == NULL) { + return FALSE; + } else { + *load = 0.0; + } + + if(loadfile == NULL) { + last_call = 0; + last_utime = 0; + last_stime = 0; + loadfile = find_cib_loadfile(); + if (loadfile == NULL) { + crm_warn("Couldn't find CIB load file"); + return FALSE; + } + ticks_per_s = sysconf(_SC_CLK_TCK); + crm_trace("Found %s", loadfile); + } + + stream = fopen(loadfile, "r"); + if(stream == NULL) { + int rc = errno; + + crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_strerror(rc), rc); + free(loadfile); loadfile = NULL; + return FALSE; + } + + if(fgets(buffer, sizeof(buffer), stream)) { + char *comm = calloc(1, 256); + char state = 0; + int rc = 0, pid = 0, ppid = 0, pgrp = 0, session = 0, tty_nr = 0, tpgid = 0; + unsigned long flags = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0, utime = 0, stime = 0; + + rc = sscanf(buffer, "%d %[^ ] %c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu", + &pid, comm, &state, + &ppid, &pgrp, &session, &tty_nr, &tpgid, + &flags, &minflt, &cminflt, &majflt, &cmajflt, &utime, &stime); + free(comm); + + if(rc != 15) { + crm_err("Only %d of 15 fields found in %s", rc, loadfile); + fclose(stream); + return FALSE; + + } else if(last_call > 0 + && last_call < now + && last_utime <= utime + && last_stime <= stime) { + + time_t elapsed = now - last_call; + unsigned long delta_utime = utime - last_utime; + unsigned long delta_stime = stime - last_stime; + + *load = (delta_utime + delta_stime); /* Cast to a float before division */ + *load /= ticks_per_s; + *load /= elapsed; + crm_debug("cib load: %f (%lu ticks in %lds)", *load, delta_utime + delta_stime, (long)elapsed); + + } else { + crm_debug("Init %lu + %lu ticks at %ld (%lu tps)", utime, stime, (long)now, ticks_per_s); + } + + last_call = now; + last_utime = utime; + last_stime = stime; + + fclose(stream); + return TRUE; + } + + fclose(stream); + return FALSE; +} + +static bool +throttle_load_avg(float *load) +{ + char buffer[256]; + FILE *stream = NULL; + const char *loadfile = "/proc/loadavg"; + + if(load == NULL) { + return FALSE; + } + + stream = fopen(loadfile, "r"); + if(stream == NULL) { + int rc = errno; + crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_strerror(rc), rc); + return FALSE; + } + + if(fgets(buffer, sizeof(buffer), stream)) { + char *nl = strstr(buffer, "\n"); + + /* Grab the 1-minute average, ignore the rest */ + *load = strtof(buffer, NULL); + if(nl) { nl[0] = 0; } + + fclose(stream); + return TRUE; + } + + fclose(stream); + return FALSE; +} + +/*! + * \internal + * \brief Check a load value against throttling thresholds + * + * \param[in] load Load value to check + * \param[in] desc Description of metric (for logging) + * \param[in] thresholds Low/medium/high/extreme thresholds + * + * \return Throttle mode corresponding to load value + */ +static enum throttle_state_e +throttle_check_thresholds(float load, const char *desc, + const float thresholds[4]) +{ + if (load > thresholds[3]) { + crm_notice("Extreme %s detected: %f", desc, load); + return throttle_extreme; + + } else if (load > thresholds[2]) { + crm_notice("High %s detected: %f", desc, load); + return throttle_high; + + } else if (load > thresholds[1]) { + crm_info("Moderate %s detected: %f", desc, load); + return throttle_med; + + } else if (load > thresholds[0]) { + crm_debug("Noticeable %s detected: %f", desc, load); + return throttle_low; + } + + crm_trace("Negligible %s detected: %f", desc, load); + return throttle_none; +} + +static enum throttle_state_e +throttle_handle_load(float load, const char *desc, int cores) +{ + float normalize; + float thresholds[4]; + + if (cores == 1) { + /* On a single core machine, a load of 1.0 is already too high */ + normalize = 0.6; + + } else { + /* Normalize the load to be per-core */ + normalize = cores; + } + thresholds[0] = throttle_load_target * normalize * THROTTLE_FACTOR_LOW; + thresholds[1] = throttle_load_target * normalize * THROTTLE_FACTOR_MEDIUM; + thresholds[2] = throttle_load_target * normalize * THROTTLE_FACTOR_HIGH; + thresholds[3] = load + 1.0; /* never extreme */ + + return throttle_check_thresholds(load, desc, thresholds); +} +#endif // HAVE_LINUX_PROCFS + +static enum throttle_state_e +throttle_mode(void) +{ + enum throttle_state_e mode = throttle_none; + +#if HAVE_LINUX_PROCFS + unsigned int cores; + float load; + float thresholds[4]; + + cores = pcmk__procfs_num_cores(); + if(throttle_cib_load(&load)) { + float cib_max_cpu = 0.95; + + /* The CIB is a single-threaded task and thus cannot consume + * more than 100% of a CPU (and 1/cores of the overall system + * load). + * + * On a many-cored system, the CIB might therefore be maxed out + * (causing operations to fail or appear to fail) even though + * the overall system load is still reasonable. + * + * Therefore, the 'normal' thresholds can not apply here, and we + * need a special case. + */ + if(cores == 1) { + cib_max_cpu = 0.4; + } + if(throttle_load_target > 0.0 && throttle_load_target < cib_max_cpu) { + cib_max_cpu = throttle_load_target; + } + + thresholds[0] = cib_max_cpu * 0.8; + thresholds[1] = cib_max_cpu * 0.9; + thresholds[2] = cib_max_cpu; + /* Can only happen on machines with a low number of cores */ + thresholds[3] = cib_max_cpu * 1.5; + + mode = throttle_check_thresholds(load, "CIB load", thresholds); + } + + if(throttle_load_target <= 0) { + /* If we ever make this a valid value, the cluster will at least behave as expected */ + return mode; + } + + if(throttle_load_avg(&load)) { + enum throttle_state_e cpu_load; + + cpu_load = throttle_handle_load(load, "CPU load", cores); + if (cpu_load > mode) { + mode = cpu_load; + } + crm_debug("Current load is %f across %u core(s)", load, cores); + } +#endif // HAVE_LINUX_PROCFS + return mode; +} + +static void +throttle_send_command(enum throttle_state_e mode) +{ + xmlNode *xml = NULL; + static enum throttle_state_e last = -1; + + if(mode != last) { + crm_info("New throttle mode: %s load (was %s)", + load2str(mode), load2str(last)); + last = mode; + + xml = create_request(CRM_OP_THROTTLE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); + crm_xml_add_int(xml, F_CRM_THROTTLE_MODE, mode); + crm_xml_add_int(xml, F_CRM_THROTTLE_MAX, throttle_job_max); + + send_cluster_message(NULL, crm_msg_crmd, xml, TRUE); + free_xml(xml); + } +} + +static gboolean +throttle_timer_cb(gpointer data) +{ + throttle_send_command(throttle_mode()); + return TRUE; +} + +static void +throttle_record_free(gpointer p) +{ + struct throttle_record_s *r = p; + free(r->node); + free(r); +} + +static void +throttle_set_load_target(float target) +{ + throttle_load_target = target; +} + +/*! + * \internal + * \brief Update the maximum number of simultaneous jobs + * + * \param[in] preference Cluster-wide node-action-limit from the CIB + */ +static void +throttle_update_job_max(const char *preference) +{ + long long max = 0LL; + const char *env_limit = getenv("PCMK_node_action_limit"); + + if (env_limit != NULL) { + preference = env_limit; // Per-node override + } + if (preference != NULL) { + pcmk__scan_ll(preference, &max, 0LL); + } + if (max > 0) { + throttle_job_max = (int) max; + } else { + // Default is based on the number of cores detected + throttle_job_max = 2 * pcmk__procfs_num_cores(); + } +} + +void +throttle_init(void) +{ + if(throttle_records == NULL) { + throttle_records = pcmk__strkey_table(NULL, throttle_record_free); + throttle_timer = mainloop_timer_add("throttle", 30 * 1000, TRUE, throttle_timer_cb, NULL); + } + + throttle_update_job_max(NULL); + mainloop_timer_start(throttle_timer); +} + +/*! + * \internal + * \brief Configure throttle options based on the CIB + * + * \param[in,out] options Name/value pairs for configured options + */ +void +controld_configure_throttle(GHashTable *options) +{ + const char *value = g_hash_table_lookup(options, "load-threshold"); + + if (value != NULL) { + throttle_set_load_target(strtof(value, NULL) / 100.0); + } + + value = g_hash_table_lookup(options, "node-action-limit"); + throttle_update_job_max(value); +} + +void +throttle_fini(void) +{ + if (throttle_timer != NULL) { + mainloop_timer_del(throttle_timer); + throttle_timer = NULL; + } + if (throttle_records != NULL) { + g_hash_table_destroy(throttle_records); + throttle_records = NULL; + } +} + +int +throttle_get_total_job_limit(int l) +{ + /* Cluster-wide limit */ + GHashTableIter iter; + int limit = l; + int peers = crm_active_peers(); + struct throttle_record_s *r = NULL; + + g_hash_table_iter_init(&iter, throttle_records); + + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &r)) { + switch(r->mode) { + + case throttle_extreme: + if(limit == 0 || limit > peers/4) { + limit = QB_MAX(1, peers/4); + } + break; + + case throttle_high: + if(limit == 0 || limit > peers/2) { + limit = QB_MAX(1, peers/2); + } + break; + default: + break; + } + } + if(limit == l) { + /* crm_trace("No change to batch-limit=%d", limit); */ + + } else if(l == 0) { + crm_trace("Using batch-limit=%d", limit); + + } else { + crm_trace("Using batch-limit=%d instead of %d", limit, l); + } + return limit; +} + +int +throttle_get_job_limit(const char *node) +{ + int jobs = 1; + struct throttle_record_s *r = NULL; + + r = g_hash_table_lookup(throttle_records, node); + if(r == NULL) { + r = calloc(1, sizeof(struct throttle_record_s)); + r->node = strdup(node); + r->mode = throttle_low; + r->max = throttle_job_max; + crm_trace("Defaulting to local values for unknown node %s", node); + + g_hash_table_insert(throttle_records, r->node, r); + } + + switch(r->mode) { + case throttle_extreme: + case throttle_high: + jobs = 1; /* At least one job must always be allowed */ + break; + case throttle_med: + jobs = QB_MAX(1, r->max / 4); + break; + case throttle_low: + jobs = QB_MAX(1, r->max / 2); + break; + case throttle_none: + jobs = QB_MAX(1, r->max); + break; + default: + crm_err("Unknown throttle mode %.4x on %s", r->mode, node); + break; + } + return jobs; +} + +void +throttle_update(xmlNode *xml) +{ + int max = 0; + int mode = 0; + struct throttle_record_s *r = NULL; + const char *from = crm_element_value(xml, F_CRM_HOST_FROM); + + crm_element_value_int(xml, F_CRM_THROTTLE_MODE, &mode); + crm_element_value_int(xml, F_CRM_THROTTLE_MAX, &max); + + r = g_hash_table_lookup(throttle_records, from); + + if(r == NULL) { + r = calloc(1, sizeof(struct throttle_record_s)); + r->node = strdup(from); + g_hash_table_insert(throttle_records, r->node, r); + } + + r->max = max; + r->mode = (enum throttle_state_e) mode; + + crm_debug("Node %s has %s load and supports at most %d jobs; new job limit %d", + from, load2str((enum throttle_state_e) mode), max, + throttle_get_job_limit(from)); +} diff --git a/daemons/controld/controld_throttle.h b/daemons/controld/controld_throttle.h new file mode 100644 index 0000000..a798c6c --- /dev/null +++ b/daemons/controld/controld_throttle.h @@ -0,0 +1,16 @@ +/* + * Copyright 2013-2021 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +void throttle_init(void); +void throttle_fini(void); +void controld_configure_throttle(GHashTable *options); + +void throttle_update(xmlNode *xml); +int throttle_get_job_limit(const char *node); +int throttle_get_total_job_limit(int l); diff --git a/daemons/controld/controld_timers.c b/daemons/controld/controld_timers.c new file mode 100644 index 0000000..a65bef5 --- /dev/null +++ b/daemons/controld/controld_timers.c @@ -0,0 +1,509 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <time.h> +#include <stdlib.h> + +#include <crm/crm.h> +#include <crm/msg_xml.h> +#include <pacemaker-controld.h> + +//! FSA mainloop timer type +typedef struct fsa_timer_s { + guint source_id; //!< Timer source ID + guint period_ms; //!< Timer period + enum crmd_fsa_input fsa_input; //!< Input to register if timer pops + gboolean (*callback) (gpointer data); //!< What do if timer pops + bool log_error; //!< Timer popping indicates error + int counter; //!< For detecting loops +} fsa_timer_t; + +//! Wait before retrying a failed cib or executor connection +static fsa_timer_t *wait_timer = NULL; + +//! Periodically re-run scheduler (for date_spec evaluation and as a failsafe) +static fsa_timer_t *recheck_timer = NULL; + +//! Wait at start-up, or after an election, for DC to make contact +static fsa_timer_t *election_timer = NULL; + +//! Delay start of new transition with expectation something else might happen +static fsa_timer_t *transition_timer = NULL; + +//! join-integration-timeout +static fsa_timer_t *integration_timer = NULL; + +//! join-finalization-timeout +static fsa_timer_t *finalization_timer = NULL; + +// Wait for DC to stop all resources and give us the all-clear to shut down +fsa_timer_t *shutdown_escalation_timer = NULL; + +//! Cluster recheck interval (from configuration) +static guint recheck_interval_ms = 0; + +static const char * +get_timer_desc(fsa_timer_t * timer) +{ + if (timer == election_timer) { + return "Election Trigger"; + + } else if (timer == shutdown_escalation_timer) { + return "Shutdown Escalation"; + + } else if (timer == integration_timer) { + return "Integration Timer"; + + } else if (timer == finalization_timer) { + return "Finalization Timer"; + + } else if (timer == transition_timer) { + return "New Transition Timer"; + + } else if (timer == wait_timer) { + return "Wait Timer"; + + } else if (timer == recheck_timer) { + return "Cluster Recheck Timer"; + + } + return "Unknown Timer"; +} + +/*! + * \internal + * \brief Stop an FSA timer + * + * \param[in,out] timer Timer to stop + * + * \return true if the timer was running, or false otherwise + */ +static bool +controld_stop_timer(fsa_timer_t *timer) +{ + CRM_CHECK(timer != NULL, return false); + + if (timer->source_id != 0) { + crm_trace("Stopping %s (would inject %s if popped after %ums, src=%d)", + get_timer_desc(timer), fsa_input2string(timer->fsa_input), + timer->period_ms, timer->source_id); + g_source_remove(timer->source_id); + timer->source_id = 0; + + } else { + crm_trace("%s already stopped (would inject %s if popped after %ums)", + get_timer_desc(timer), fsa_input2string(timer->fsa_input), + timer->period_ms); + return false; + } + return true; +} + +/*! + * \internal + * \brief Start an FSA timer + * + * \param[in,out] timer Timer to start + */ +static void +controld_start_timer(fsa_timer_t *timer) +{ + if (timer->source_id == 0 && timer->period_ms > 0) { + timer->source_id = g_timeout_add(timer->period_ms, timer->callback, (void *)timer); + CRM_ASSERT(timer->source_id != 0); + crm_debug("Started %s (inject %s if pops after %ums, source=%d)", + get_timer_desc(timer), fsa_input2string(timer->fsa_input), + timer->period_ms, timer->source_id); + } else { + crm_debug("%s already running (inject %s if pops after %ums, source=%d)", + get_timer_desc(timer), fsa_input2string(timer->fsa_input), + timer->period_ms, timer->source_id); + } +} + +/* A_DC_TIMER_STOP, A_DC_TIMER_START, + * A_FINALIZE_TIMER_STOP, A_FINALIZE_TIMER_START + * A_INTEGRATE_TIMER_STOP, A_INTEGRATE_TIMER_START + */ +void +do_timer_control(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + gboolean timer_op_ok = TRUE; + + if (action & A_DC_TIMER_STOP) { + timer_op_ok = controld_stop_timer(election_timer); + + } else if (action & A_FINALIZE_TIMER_STOP) { + timer_op_ok = controld_stop_timer(finalization_timer); + + } else if (action & A_INTEGRATE_TIMER_STOP) { + timer_op_ok = controld_stop_timer(integration_timer); + } + + /* don't start a timer that wasn't already running */ + if (action & A_DC_TIMER_START && timer_op_ok) { + controld_start_timer(election_timer); + if (AM_I_DC) { + /* there can be only one */ + register_fsa_input(cause, I_ELECTION, NULL); + } + + } else if (action & A_FINALIZE_TIMER_START) { + controld_start_timer(finalization_timer); + + } else if (action & A_INTEGRATE_TIMER_START) { + controld_start_timer(integration_timer); + } +} + +static gboolean +crm_timer_popped(gpointer data) +{ + fsa_timer_t *timer = (fsa_timer_t *) data; + + if (timer->log_error) { + crm_err("%s just popped in state %s! " CRM_XS " input=%s time=%ums", + get_timer_desc(timer), + fsa_state2string(controld_globals.fsa_state), + fsa_input2string(timer->fsa_input), timer->period_ms); + } else { + crm_info("%s just popped " CRM_XS " input=%s time=%ums", + get_timer_desc(timer), fsa_input2string(timer->fsa_input), + timer->period_ms); + timer->counter++; + } + + if ((timer == election_timer) && (election_timer->counter > 5)) { + crm_notice("We appear to be in an election loop, something may be wrong"); + crm_write_blackbox(0, NULL); + election_timer->counter = 0; + } + + controld_stop_timer(timer); // Make timer _not_ go off again + + if (timer->fsa_input == I_INTEGRATED) { + crm_info("Welcomed: %d, Integrated: %d", + crmd_join_phase_count(crm_join_welcomed), + crmd_join_phase_count(crm_join_integrated)); + if (crmd_join_phase_count(crm_join_welcomed) == 0) { + // If we don't even have ourselves, start again + register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, NULL, + __func__); + + } else { + register_fsa_input_before(C_TIMER_POPPED, timer->fsa_input, NULL); + } + + } else if ((timer == recheck_timer) + && (controld_globals.fsa_state != S_IDLE)) { + crm_debug("Discarding %s event in state: %s", + fsa_input2string(timer->fsa_input), + fsa_state2string(controld_globals.fsa_state)); + + } else if ((timer == finalization_timer) + && (controld_globals.fsa_state != S_FINALIZE_JOIN)) { + crm_debug("Discarding %s event in state: %s", + fsa_input2string(timer->fsa_input), + fsa_state2string(controld_globals.fsa_state)); + + } else if (timer->fsa_input != I_NULL) { + register_fsa_input(C_TIMER_POPPED, timer->fsa_input, NULL); + } + + controld_trigger_fsa(); + + return TRUE; +} + +bool +controld_init_fsa_timers(void) +{ + transition_timer = calloc(1, sizeof(fsa_timer_t)); + if (transition_timer == NULL) { + return FALSE; + } + + integration_timer = calloc(1, sizeof(fsa_timer_t)); + if (integration_timer == NULL) { + return FALSE; + } + + finalization_timer = calloc(1, sizeof(fsa_timer_t)); + if (finalization_timer == NULL) { + return FALSE; + } + + election_timer = calloc(1, sizeof(fsa_timer_t)); + if (election_timer == NULL) { + return FALSE; + } + + shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t)); + if (shutdown_escalation_timer == NULL) { + return FALSE; + } + + wait_timer = calloc(1, sizeof(fsa_timer_t)); + if (wait_timer == NULL) { + return FALSE; + } + + recheck_timer = calloc(1, sizeof(fsa_timer_t)); + if (recheck_timer == NULL) { + return FALSE; + } + + election_timer->source_id = 0; + election_timer->period_ms = 0; + election_timer->fsa_input = I_DC_TIMEOUT; + election_timer->callback = crm_timer_popped; + election_timer->log_error = FALSE; + + transition_timer->source_id = 0; + transition_timer->period_ms = 0; + transition_timer->fsa_input = I_PE_CALC; + transition_timer->callback = crm_timer_popped; + transition_timer->log_error = FALSE; + + integration_timer->source_id = 0; + integration_timer->period_ms = 0; + integration_timer->fsa_input = I_INTEGRATED; + integration_timer->callback = crm_timer_popped; + integration_timer->log_error = TRUE; + + finalization_timer->source_id = 0; + finalization_timer->period_ms = 0; + finalization_timer->fsa_input = I_FINALIZED; + finalization_timer->callback = crm_timer_popped; + finalization_timer->log_error = FALSE; + + /* We can't use I_FINALIZED here, because that creates a bug in the join + * process where a joining node can be stuck in S_PENDING while we think it + * is in S_NOT_DC. This created an infinite transition loop in which we + * continually send probes which the node NACKs because it's pending. + * + * If we have nodes where the cluster layer is active but the controller is + * not, we can avoid this causing an election/join loop, in the integration + * phase. + */ + finalization_timer->fsa_input = I_ELECTION; + + shutdown_escalation_timer->source_id = 0; + shutdown_escalation_timer->period_ms = 0; + shutdown_escalation_timer->fsa_input = I_STOP; + shutdown_escalation_timer->callback = crm_timer_popped; + shutdown_escalation_timer->log_error = TRUE; + + wait_timer->source_id = 0; + wait_timer->period_ms = 2000; + wait_timer->fsa_input = I_NULL; + wait_timer->callback = crm_timer_popped; + wait_timer->log_error = FALSE; + + recheck_timer->source_id = 0; + recheck_timer->period_ms = 0; + recheck_timer->fsa_input = I_PE_CALC; + recheck_timer->callback = crm_timer_popped; + recheck_timer->log_error = FALSE; + + return TRUE; +} + +/*! + * \internal + * \brief Configure timers based on the CIB + * + * \param[in,out] options Name/value pairs for configured options + */ +void +controld_configure_fsa_timers(GHashTable *options) +{ + const char *value = NULL; + + // Election timer + value = g_hash_table_lookup(options, XML_CONFIG_ATTR_DC_DEADTIME); + election_timer->period_ms = crm_parse_interval_spec(value); + + // Integration timer + value = g_hash_table_lookup(options, "join-integration-timeout"); + integration_timer->period_ms = crm_parse_interval_spec(value); + + // Finalization timer + value = g_hash_table_lookup(options, "join-finalization-timeout"); + finalization_timer->period_ms = crm_parse_interval_spec(value); + + // Shutdown escalation timer + value = g_hash_table_lookup(options, XML_CONFIG_ATTR_FORCE_QUIT); + shutdown_escalation_timer->period_ms = crm_parse_interval_spec(value); + crm_debug("Shutdown escalation occurs if DC has not responded to request " + "in %ums", shutdown_escalation_timer->period_ms); + + // Transition timer + value = g_hash_table_lookup(options, "transition-delay"); + transition_timer->period_ms = crm_parse_interval_spec(value); + + // Recheck interval + value = g_hash_table_lookup(options, XML_CONFIG_ATTR_RECHECK); + recheck_interval_ms = crm_parse_interval_spec(value); + crm_debug("Re-run scheduler after %dms of inactivity", recheck_interval_ms); +} + +void +controld_free_fsa_timers(void) +{ + controld_stop_timer(transition_timer); + controld_stop_timer(integration_timer); + controld_stop_timer(finalization_timer); + controld_stop_timer(election_timer); + controld_stop_timer(shutdown_escalation_timer); + controld_stop_timer(wait_timer); + controld_stop_timer(recheck_timer); + + free(transition_timer); transition_timer = NULL; + free(integration_timer); integration_timer = NULL; + free(finalization_timer); finalization_timer = NULL; + free(election_timer); election_timer = NULL; + free(shutdown_escalation_timer); shutdown_escalation_timer = NULL; + free(wait_timer); wait_timer = NULL; + free(recheck_timer); recheck_timer = NULL; +} + +/*! + * \internal + * \brief Check whether the transition timer is started + * \return true if the transition timer is started, or false otherwise + */ +bool +controld_is_started_transition_timer(void) +{ + return (transition_timer->period_ms > 0) + && (transition_timer->source_id != 0); +} + +/*! + * \internal + * \brief Start the recheck timer + */ +void +controld_start_recheck_timer(void) +{ + // Default to recheck interval configured in CIB (if any) + guint period_ms = recheck_interval_ms; + + // If scheduler supplied a "recheck by" time, check whether that's sooner + if (controld_globals.transition_graph->recheck_by > 0) { + time_t diff_seconds = controld_globals.transition_graph->recheck_by + - time(NULL); + + if (diff_seconds < 1) { + // We're already past the desired time + period_ms = 500; + } else { + period_ms = (guint) diff_seconds * 1000; + } + + // Use "recheck by" only if it's sooner than interval from CIB + if (period_ms > recheck_interval_ms) { + period_ms = recheck_interval_ms; + } + } + + if (period_ms > 0) { + recheck_timer->period_ms = period_ms; + controld_start_timer(recheck_timer); + } +} + +/*! + * \internal + * \brief Start the wait timer + */ +void +controld_start_wait_timer(void) +{ + controld_start_timer(wait_timer); +} + +/*! + * \internal + * \brief Stop the recheck timer + * + * \return true if the recheck timer was running, or false otherwise + */ +bool +controld_stop_recheck_timer(void) +{ + return controld_stop_timer(recheck_timer); +} + +/*! + * \brief Get the transition timer's configured period + * \return The transition_timer's period + */ +guint +controld_get_period_transition_timer(void) +{ + return transition_timer->period_ms; +} + +/*! + * \internal + * \brief Reset the election timer's counter to 0 + */ +void +controld_reset_counter_election_timer(void) +{ + election_timer->counter = 0; +} + +/*! + * \internal + * \brief Stop the transition timer + * + * \return true if the transition timer was running, or false otherwise + */ +bool +controld_stop_transition_timer(void) +{ + return controld_stop_timer(transition_timer); +} + +/*! + * \internal + * \brief Start the transition timer + */ +void +controld_start_transition_timer(void) +{ + controld_start_timer(transition_timer); +} + +/*! + * \internal + * \brief Start the countdown sequence for a shutdown + * + * \param[in] default_period_ms Period to use if the shutdown escalation + * timer's period is 0 + */ +void +controld_shutdown_start_countdown(guint default_period_ms) +{ + if (shutdown_escalation_timer->period_ms == 0) { + shutdown_escalation_timer->period_ms = default_period_ms; + } + + crm_notice("Initiating controller shutdown sequence " CRM_XS " limit=%ums", + shutdown_escalation_timer->period_ms); + controld_start_timer(shutdown_escalation_timer); +} diff --git a/daemons/controld/controld_timers.h b/daemons/controld/controld_timers.h new file mode 100644 index 0000000..587f4d1 --- /dev/null +++ b/daemons/controld/controld_timers.h @@ -0,0 +1,36 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef CONTROLD_TIMERS__H +# define CONTROLD_TIMERS__H + +# include <stdbool.h> // bool +# include <glib.h> // gboolean, gpointer, guint +# include <controld_fsa.h> // crmd_fsa_input + +bool controld_init_fsa_timers(void); +void controld_free_fsa_timers(void); +void controld_configure_fsa_timers(GHashTable *options); + +bool controld_stop_recheck_timer(void); +bool controld_stop_transition_timer(void); + +void controld_start_recheck_timer(void); +void controld_start_transition_timer(void); +void controld_start_wait_timer(void); + +bool controld_is_started_transition_timer(void); + +guint controld_get_period_transition_timer(void); + +void controld_reset_counter_election_timer(void); + +void controld_shutdown_start_countdown(guint default_period_ms); + +#endif diff --git a/daemons/controld/controld_transition.c b/daemons/controld/controld_transition.c new file mode 100644 index 0000000..c8a342c --- /dev/null +++ b/daemons/controld/controld_transition.c @@ -0,0 +1,197 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <crm/crm.h> +#include <crm/msg_xml.h> +#include <crm/common/xml.h> + +#include <pacemaker-controld.h> + +static void +global_cib_callback(const xmlNode * msg, int callid, int rc, xmlNode * output) +{ +} + +static pcmk__graph_t * +create_blank_graph(void) +{ + pcmk__graph_t *a_graph = pcmk__unpack_graph(NULL, NULL); + + a_graph->complete = true; + a_graph->abort_reason = "DC Takeover"; + a_graph->completion_action = pcmk__graph_restart; + return a_graph; +} + +/* A_TE_START, A_TE_STOP, O_TE_RESTART */ +void +do_te_control(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + cib_t *cib_conn = controld_globals.cib_conn; + gboolean init_ok = TRUE; + + if (pcmk_is_set(action, A_TE_STOP)) { + pcmk__free_graph(controld_globals.transition_graph); + controld_globals.transition_graph = NULL; + + if (cib_conn != NULL) { + cib_conn->cmds->del_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY, + te_update_diff); + } + + controld_clear_fsa_input_flags(R_TE_CONNECTED); + crm_info("Transitioner is now inactive"); + } + + if ((action & A_TE_START) == 0) { + return; + + } else if (pcmk_is_set(controld_globals.fsa_input_register, + R_TE_CONNECTED)) { + crm_debug("The transitioner is already active"); + return; + + } else if ((action & A_TE_START) && cur_state == S_STOPPING) { + crm_info("Ignoring request to start the transitioner while shutting down"); + return; + } + + if (controld_globals.te_uuid == NULL) { + controld_globals.te_uuid = crm_generate_uuid(); + crm_info("Registering TE UUID: %s", controld_globals.te_uuid); + } + + if (cib_conn == NULL) { + crm_err("Could not set CIB callbacks"); + init_ok = FALSE; + + } else { + if (cib_conn->cmds->add_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY, + te_update_diff) != pcmk_ok) { + crm_err("Could not set CIB notification callback"); + init_ok = FALSE; + } + + if (cib_conn->cmds->set_op_callback(cib_conn, + global_cib_callback) != pcmk_ok) { + crm_err("Could not set CIB global callback"); + init_ok = FALSE; + } + } + + if (init_ok) { + controld_register_graph_functions(); + pcmk__free_graph(controld_globals.transition_graph); + + /* create a blank one */ + crm_debug("Transitioner is now active"); + controld_globals.transition_graph = create_blank_graph(); + controld_set_fsa_input_flags(R_TE_CONNECTED); + } +} + +/* A_TE_INVOKE, A_TE_CANCEL */ +void +do_te_invoke(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + + if (!AM_I_DC + || ((controld_globals.fsa_state != S_TRANSITION_ENGINE) + && pcmk_is_set(action, A_TE_INVOKE))) { + crm_notice("No need to invoke the TE (%s) in state %s", + fsa_action2string(action), + fsa_state2string(controld_globals.fsa_state)); + return; + } + + if (action & A_TE_CANCEL) { + crm_debug("Cancelling the transition: %sactive", + controld_globals.transition_graph->complete? "in" : ""); + abort_transition(INFINITY, pcmk__graph_restart, "Peer Cancelled", NULL); + if (!controld_globals.transition_graph->complete) { + crmd_fsa_stall(FALSE); + } + + } else if (action & A_TE_HALT) { + abort_transition(INFINITY, pcmk__graph_wait, "Peer Halt", NULL); + if (!controld_globals.transition_graph->complete) { + crmd_fsa_stall(FALSE); + } + + } else if (action & A_TE_INVOKE) { + ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); + xmlNode *graph_data = input->xml; + const char *ref = crm_element_value(input->msg, XML_ATTR_REFERENCE); + const char *graph_file = crm_element_value(input->msg, F_CRM_TGRAPH); + const char *graph_input = crm_element_value(input->msg, F_CRM_TGRAPH_INPUT); + + if (graph_file == NULL && graph_data == NULL) { + crm_log_xml_err(input->msg, "Bad command"); + register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); + return; + } + + if (!controld_globals.transition_graph->complete) { + crm_info("Another transition is already active"); + abort_transition(INFINITY, pcmk__graph_restart, "Transition Active", + NULL); + return; + } + + if ((controld_globals.fsa_pe_ref == NULL) + || !pcmk__str_eq(controld_globals.fsa_pe_ref, ref, + pcmk__str_none)) { + crm_info("Transition is redundant: %s expected but %s received", + pcmk__s(controld_globals.fsa_pe_ref, "no reference"), + pcmk__s(ref, "no reference")); + abort_transition(INFINITY, pcmk__graph_restart, + "Transition Redundant", NULL); + } + + if (graph_data == NULL && graph_file != NULL) { + graph_data = filename2xml(graph_file); + } + + if (controld_is_started_transition_timer()) { + crm_debug("The transitioner wait for a transition timer"); + return; + } + + CRM_CHECK(graph_data != NULL, + crm_err("Input raised by %s is invalid", msg_data->origin); + crm_log_xml_err(input->msg, "Bad command"); + return); + + pcmk__free_graph(controld_globals.transition_graph); + controld_globals.transition_graph = pcmk__unpack_graph(graph_data, + graph_input); + CRM_CHECK(controld_globals.transition_graph != NULL, + controld_globals.transition_graph = create_blank_graph(); + return); + crm_info("Processing graph %d (ref=%s) derived from %s", + controld_globals.transition_graph->id, ref, graph_input); + + te_reset_job_counts(); + + trigger_graph(); + pcmk__log_graph(LOG_TRACE, controld_globals.transition_graph); + + if (graph_data != input->xml) { + free_xml(graph_data); + } + } +} diff --git a/daemons/controld/controld_transition.h b/daemons/controld/controld_transition.h new file mode 100644 index 0000000..2da4221 --- /dev/null +++ b/daemons/controld/controld_transition.h @@ -0,0 +1,63 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef TENGINE__H +# define TENGINE__H + +# include <crm/common/mainloop.h> +# include <crm/stonith-ng.h> +# include <crm/services.h> +# include <pacemaker-internal.h> + +/* tengine */ +pcmk__graph_action_t *match_down_event(const char *target); +pcmk__graph_action_t *get_cancel_action(const char *id, const char *node); +bool confirm_cancel_action(const char *id, const char *node_id); + +void controld_record_action_timeout(pcmk__graph_action_t *action); + +void controld_destroy_outside_events_table(void); +void controld_remove_all_outside_events(void); + +gboolean fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node); +void process_graph_event(xmlNode *event, const char *event_node); + +/* utils */ +pcmk__graph_action_t *controld_get_action(int id); +gboolean stop_te_timer(pcmk__graph_action_t *action); +const char *get_rsc_state(const char *task, enum pcmk_exec_status status); + +void process_te_message(xmlNode *msg, xmlNode *xml_data); + +void controld_register_graph_functions(void); + +void notify_crmd(pcmk__graph_t * graph); + +void cib_action_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, + void *user_data); +gboolean action_timer_callback(gpointer data); +void te_update_diff(const char *event, xmlNode *msg); + +void controld_init_transition_trigger(void); +void controld_destroy_transition_trigger(void); + +void controld_trigger_graph_as(const char *fn, int line); +void abort_after_delay(int abort_priority, enum pcmk__graph_next abort_action, + const char *abort_text, guint delay_ms); +void abort_transition_graph(int abort_priority, + enum pcmk__graph_next abort_action, + const char *abort_text, const xmlNode *reason, + const char *fn, int line); + +# define trigger_graph() controld_trigger_graph_as(__func__, __LINE__) +# define abort_transition(pri, action, text, reason) \ + abort_transition_graph(pri, action, text, reason,__func__,__LINE__); + +void te_action_confirmed(pcmk__graph_action_t *action, pcmk__graph_t *graph); +void te_reset_job_counts(void); + +#endif diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c new file mode 100644 index 0000000..4ce09d9 --- /dev/null +++ b/daemons/controld/controld_utils.c @@ -0,0 +1,837 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <stdlib.h> +#include <stdint.h> // uint64_t + +#include <crm/crm.h> +#include <crm/cib.h> +#include <crm/msg_xml.h> +#include <crm/common/xml.h> + +#include <pacemaker-controld.h> + +const char * +fsa_input2string(enum crmd_fsa_input input) +{ + const char *inputAsText = NULL; + + switch (input) { + case I_NULL: + inputAsText = "I_NULL"; + break; + case I_CIB_OP: + inputAsText = "I_CIB_OP (unused)"; + break; + case I_CIB_UPDATE: + inputAsText = "I_CIB_UPDATE"; + break; + case I_DC_TIMEOUT: + inputAsText = "I_DC_TIMEOUT"; + break; + case I_ELECTION: + inputAsText = "I_ELECTION"; + break; + case I_PE_CALC: + inputAsText = "I_PE_CALC"; + break; + case I_RELEASE_DC: + inputAsText = "I_RELEASE_DC"; + break; + case I_ELECTION_DC: + inputAsText = "I_ELECTION_DC"; + break; + case I_ERROR: + inputAsText = "I_ERROR"; + break; + case I_FAIL: + inputAsText = "I_FAIL"; + break; + case I_INTEGRATED: + inputAsText = "I_INTEGRATED"; + break; + case I_FINALIZED: + inputAsText = "I_FINALIZED"; + break; + case I_NODE_JOIN: + inputAsText = "I_NODE_JOIN"; + break; + case I_JOIN_OFFER: + inputAsText = "I_JOIN_OFFER"; + break; + case I_JOIN_REQUEST: + inputAsText = "I_JOIN_REQUEST"; + break; + case I_JOIN_RESULT: + inputAsText = "I_JOIN_RESULT"; + break; + case I_NOT_DC: + inputAsText = "I_NOT_DC"; + break; + case I_RECOVERED: + inputAsText = "I_RECOVERED"; + break; + case I_RELEASE_FAIL: + inputAsText = "I_RELEASE_FAIL"; + break; + case I_RELEASE_SUCCESS: + inputAsText = "I_RELEASE_SUCCESS"; + break; + case I_RESTART: + inputAsText = "I_RESTART"; + break; + case I_PE_SUCCESS: + inputAsText = "I_PE_SUCCESS"; + break; + case I_ROUTER: + inputAsText = "I_ROUTER"; + break; + case I_SHUTDOWN: + inputAsText = "I_SHUTDOWN"; + break; + case I_STARTUP: + inputAsText = "I_STARTUP"; + break; + case I_TE_SUCCESS: + inputAsText = "I_TE_SUCCESS"; + break; + case I_STOP: + inputAsText = "I_STOP"; + break; + case I_DC_HEARTBEAT: + inputAsText = "I_DC_HEARTBEAT"; + break; + case I_WAIT_FOR_EVENT: + inputAsText = "I_WAIT_FOR_EVENT"; + break; + case I_LRM_EVENT: + inputAsText = "I_LRM_EVENT"; + break; + case I_PENDING: + inputAsText = "I_PENDING"; + break; + case I_HALT: + inputAsText = "I_HALT"; + break; + case I_TERMINATE: + inputAsText = "I_TERMINATE"; + break; + case I_ILLEGAL: + inputAsText = "I_ILLEGAL"; + break; + } + + if (inputAsText == NULL) { + crm_err("Input %d is unknown", input); + inputAsText = "<UNKNOWN_INPUT>"; + } + + return inputAsText; +} + +const char * +fsa_state2string(enum crmd_fsa_state state) +{ + const char *stateAsText = NULL; + + switch (state) { + case S_IDLE: + stateAsText = "S_IDLE"; + break; + case S_ELECTION: + stateAsText = "S_ELECTION"; + break; + case S_INTEGRATION: + stateAsText = "S_INTEGRATION"; + break; + case S_FINALIZE_JOIN: + stateAsText = "S_FINALIZE_JOIN"; + break; + case S_NOT_DC: + stateAsText = "S_NOT_DC"; + break; + case S_POLICY_ENGINE: + stateAsText = "S_POLICY_ENGINE"; + break; + case S_RECOVERY: + stateAsText = "S_RECOVERY"; + break; + case S_RELEASE_DC: + stateAsText = "S_RELEASE_DC"; + break; + case S_PENDING: + stateAsText = "S_PENDING"; + break; + case S_STOPPING: + stateAsText = "S_STOPPING"; + break; + case S_TERMINATE: + stateAsText = "S_TERMINATE"; + break; + case S_TRANSITION_ENGINE: + stateAsText = "S_TRANSITION_ENGINE"; + break; + case S_STARTING: + stateAsText = "S_STARTING"; + break; + case S_HALT: + stateAsText = "S_HALT"; + break; + case S_ILLEGAL: + stateAsText = "S_ILLEGAL"; + break; + } + + if (stateAsText == NULL) { + crm_err("State %d is unknown", state); + stateAsText = "<UNKNOWN_STATE>"; + } + + return stateAsText; +} + +const char * +fsa_cause2string(enum crmd_fsa_cause cause) +{ + const char *causeAsText = NULL; + + switch (cause) { + case C_UNKNOWN: + causeAsText = "C_UNKNOWN"; + break; + case C_STARTUP: + causeAsText = "C_STARTUP"; + break; + case C_IPC_MESSAGE: + causeAsText = "C_IPC_MESSAGE"; + break; + case C_HA_MESSAGE: + causeAsText = "C_HA_MESSAGE"; + break; + case C_TIMER_POPPED: + causeAsText = "C_TIMER_POPPED"; + break; + case C_SHUTDOWN: + causeAsText = "C_SHUTDOWN"; + break; + case C_LRM_OP_CALLBACK: + causeAsText = "C_LRM_OP_CALLBACK"; + break; + case C_CRMD_STATUS_CALLBACK: + causeAsText = "C_CRMD_STATUS_CALLBACK"; + break; + case C_FSA_INTERNAL: + causeAsText = "C_FSA_INTERNAL"; + break; + } + + if (causeAsText == NULL) { + crm_err("Cause %d is unknown", cause); + causeAsText = "<UNKNOWN_CAUSE>"; + } + + return causeAsText; +} + +const char * +fsa_action2string(long long action) +{ + const char *actionAsText = NULL; + + switch (action) { + + case A_NOTHING: + actionAsText = "A_NOTHING"; + break; + case A_ELECTION_START: + actionAsText = "A_ELECTION_START"; + break; + case A_DC_JOIN_FINAL: + actionAsText = "A_DC_JOIN_FINAL"; + break; + case A_READCONFIG: + actionAsText = "A_READCONFIG"; + break; + case O_RELEASE: + actionAsText = "O_RELEASE"; + break; + case A_STARTUP: + actionAsText = "A_STARTUP"; + break; + case A_STARTED: + actionAsText = "A_STARTED"; + break; + case A_HA_CONNECT: + actionAsText = "A_HA_CONNECT"; + break; + case A_HA_DISCONNECT: + actionAsText = "A_HA_DISCONNECT"; + break; + case A_LRM_CONNECT: + actionAsText = "A_LRM_CONNECT"; + break; + case A_LRM_EVENT: + actionAsText = "A_LRM_EVENT"; + break; + case A_LRM_INVOKE: + actionAsText = "A_LRM_INVOKE"; + break; + case A_LRM_DISCONNECT: + actionAsText = "A_LRM_DISCONNECT"; + break; + case O_LRM_RECONNECT: + actionAsText = "O_LRM_RECONNECT"; + break; + case A_CL_JOIN_QUERY: + actionAsText = "A_CL_JOIN_QUERY"; + break; + case A_DC_TIMER_STOP: + actionAsText = "A_DC_TIMER_STOP"; + break; + case A_DC_TIMER_START: + actionAsText = "A_DC_TIMER_START"; + break; + case A_INTEGRATE_TIMER_START: + actionAsText = "A_INTEGRATE_TIMER_START"; + break; + case A_INTEGRATE_TIMER_STOP: + actionAsText = "A_INTEGRATE_TIMER_STOP"; + break; + case A_FINALIZE_TIMER_START: + actionAsText = "A_FINALIZE_TIMER_START"; + break; + case A_FINALIZE_TIMER_STOP: + actionAsText = "A_FINALIZE_TIMER_STOP"; + break; + case A_ELECTION_COUNT: + actionAsText = "A_ELECTION_COUNT"; + break; + case A_ELECTION_VOTE: + actionAsText = "A_ELECTION_VOTE"; + break; + case A_ELECTION_CHECK: + actionAsText = "A_ELECTION_CHECK"; + break; + case A_CL_JOIN_ANNOUNCE: + actionAsText = "A_CL_JOIN_ANNOUNCE"; + break; + case A_CL_JOIN_REQUEST: + actionAsText = "A_CL_JOIN_REQUEST"; + break; + case A_CL_JOIN_RESULT: + actionAsText = "A_CL_JOIN_RESULT"; + break; + case A_DC_JOIN_OFFER_ALL: + actionAsText = "A_DC_JOIN_OFFER_ALL"; + break; + case A_DC_JOIN_OFFER_ONE: + actionAsText = "A_DC_JOIN_OFFER_ONE"; + break; + case A_DC_JOIN_PROCESS_REQ: + actionAsText = "A_DC_JOIN_PROCESS_REQ"; + break; + case A_DC_JOIN_PROCESS_ACK: + actionAsText = "A_DC_JOIN_PROCESS_ACK"; + break; + case A_DC_JOIN_FINALIZE: + actionAsText = "A_DC_JOIN_FINALIZE"; + break; + case A_MSG_PROCESS: + actionAsText = "A_MSG_PROCESS"; + break; + case A_MSG_ROUTE: + actionAsText = "A_MSG_ROUTE"; + break; + case A_RECOVER: + actionAsText = "A_RECOVER"; + break; + case A_DC_RELEASE: + actionAsText = "A_DC_RELEASE"; + break; + case A_DC_RELEASED: + actionAsText = "A_DC_RELEASED"; + break; + case A_DC_TAKEOVER: + actionAsText = "A_DC_TAKEOVER"; + break; + case A_SHUTDOWN: + actionAsText = "A_SHUTDOWN"; + break; + case A_SHUTDOWN_REQ: + actionAsText = "A_SHUTDOWN_REQ"; + break; + case A_STOP: + actionAsText = "A_STOP "; + break; + case A_EXIT_0: + actionAsText = "A_EXIT_0"; + break; + case A_EXIT_1: + actionAsText = "A_EXIT_1"; + break; + case O_CIB_RESTART: + actionAsText = "O_CIB_RESTART"; + break; + case A_CIB_START: + actionAsText = "A_CIB_START"; + break; + case A_CIB_STOP: + actionAsText = "A_CIB_STOP"; + break; + case A_TE_INVOKE: + actionAsText = "A_TE_INVOKE"; + break; + case O_TE_RESTART: + actionAsText = "O_TE_RESTART"; + break; + case A_TE_START: + actionAsText = "A_TE_START"; + break; + case A_TE_STOP: + actionAsText = "A_TE_STOP"; + break; + case A_TE_HALT: + actionAsText = "A_TE_HALT"; + break; + case A_TE_CANCEL: + actionAsText = "A_TE_CANCEL"; + break; + case A_PE_INVOKE: + actionAsText = "A_PE_INVOKE"; + break; + case O_PE_RESTART: + actionAsText = "O_PE_RESTART"; + break; + case A_PE_START: + actionAsText = "A_PE_START"; + break; + case A_PE_STOP: + actionAsText = "A_PE_STOP"; + break; + case A_NODE_BLOCK: + actionAsText = "A_NODE_BLOCK"; + break; + case A_UPDATE_NODESTATUS: + actionAsText = "A_UPDATE_NODESTATUS"; + break; + case A_LOG: + actionAsText = "A_LOG "; + break; + case A_ERROR: + actionAsText = "A_ERROR "; + break; + case A_WARN: + actionAsText = "A_WARN "; + break; + /* Composite actions */ + case A_DC_TIMER_START | A_CL_JOIN_QUERY: + actionAsText = "A_DC_TIMER_START|A_CL_JOIN_QUERY"; + break; + } + + if (actionAsText == NULL) { + crm_err("Action %.16llx is unknown", action); + actionAsText = "<UNKNOWN_ACTION>"; + } + + return actionAsText; +} + +void +fsa_dump_inputs(int log_level, const char *text, long long input_register) +{ + if (input_register == A_NOTHING) { + return; + } + if (text == NULL) { + text = "Input register contents:"; + } + + if (pcmk_is_set(input_register, R_THE_DC)) { + crm_trace("%s %.16llx (R_THE_DC)", text, R_THE_DC); + } + if (pcmk_is_set(input_register, R_STARTING)) { + crm_trace("%s %.16llx (R_STARTING)", text, R_STARTING); + } + if (pcmk_is_set(input_register, R_SHUTDOWN)) { + crm_trace("%s %.16llx (R_SHUTDOWN)", text, R_SHUTDOWN); + } + if (pcmk_is_set(input_register, R_STAYDOWN)) { + crm_trace("%s %.16llx (R_STAYDOWN)", text, R_STAYDOWN); + } + if (pcmk_is_set(input_register, R_JOIN_OK)) { + crm_trace("%s %.16llx (R_JOIN_OK)", text, R_JOIN_OK); + } + if (pcmk_is_set(input_register, R_READ_CONFIG)) { + crm_trace("%s %.16llx (R_READ_CONFIG)", text, R_READ_CONFIG); + } + if (pcmk_is_set(input_register, R_INVOKE_PE)) { + crm_trace("%s %.16llx (R_INVOKE_PE)", text, R_INVOKE_PE); + } + if (pcmk_is_set(input_register, R_CIB_CONNECTED)) { + crm_trace("%s %.16llx (R_CIB_CONNECTED)", text, R_CIB_CONNECTED); + } + if (pcmk_is_set(input_register, R_PE_CONNECTED)) { + crm_trace("%s %.16llx (R_PE_CONNECTED)", text, R_PE_CONNECTED); + } + if (pcmk_is_set(input_register, R_TE_CONNECTED)) { + crm_trace("%s %.16llx (R_TE_CONNECTED)", text, R_TE_CONNECTED); + } + if (pcmk_is_set(input_register, R_LRM_CONNECTED)) { + crm_trace("%s %.16llx (R_LRM_CONNECTED)", text, R_LRM_CONNECTED); + } + if (pcmk_is_set(input_register, R_CIB_REQUIRED)) { + crm_trace("%s %.16llx (R_CIB_REQUIRED)", text, R_CIB_REQUIRED); + } + if (pcmk_is_set(input_register, R_PE_REQUIRED)) { + crm_trace("%s %.16llx (R_PE_REQUIRED)", text, R_PE_REQUIRED); + } + if (pcmk_is_set(input_register, R_TE_REQUIRED)) { + crm_trace("%s %.16llx (R_TE_REQUIRED)", text, R_TE_REQUIRED); + } + if (pcmk_is_set(input_register, R_REQ_PEND)) { + crm_trace("%s %.16llx (R_REQ_PEND)", text, R_REQ_PEND); + } + if (pcmk_is_set(input_register, R_PE_PEND)) { + crm_trace("%s %.16llx (R_PE_PEND)", text, R_PE_PEND); + } + if (pcmk_is_set(input_register, R_TE_PEND)) { + crm_trace("%s %.16llx (R_TE_PEND)", text, R_TE_PEND); + } + if (pcmk_is_set(input_register, R_RESP_PEND)) { + crm_trace("%s %.16llx (R_RESP_PEND)", text, R_RESP_PEND); + } + if (pcmk_is_set(input_register, R_CIB_DONE)) { + crm_trace("%s %.16llx (R_CIB_DONE)", text, R_CIB_DONE); + } + if (pcmk_is_set(input_register, R_HAVE_CIB)) { + crm_trace("%s %.16llx (R_HAVE_CIB)", text, R_HAVE_CIB); + } + if (pcmk_is_set(input_register, R_MEMBERSHIP)) { + crm_trace("%s %.16llx (R_MEMBERSHIP)", text, R_MEMBERSHIP); + } + if (pcmk_is_set(input_register, R_PEER_DATA)) { + crm_trace("%s %.16llx (R_PEER_DATA)", text, R_PEER_DATA); + } + if (pcmk_is_set(input_register, R_IN_RECOVERY)) { + crm_trace("%s %.16llx (R_IN_RECOVERY)", text, R_IN_RECOVERY); + } +} + +void +fsa_dump_actions(uint64_t action, const char *text) +{ + if (pcmk_is_set(action, A_READCONFIG)) { + crm_trace("Action %.16llx (A_READCONFIG) %s", A_READCONFIG, text); + } + if (pcmk_is_set(action, A_STARTUP)) { + crm_trace("Action %.16llx (A_STARTUP) %s", A_STARTUP, text); + } + if (pcmk_is_set(action, A_STARTED)) { + crm_trace("Action %.16llx (A_STARTED) %s", A_STARTED, text); + } + if (pcmk_is_set(action, A_HA_CONNECT)) { + crm_trace("Action %.16llx (A_CONNECT) %s", A_HA_CONNECT, text); + } + if (pcmk_is_set(action, A_HA_DISCONNECT)) { + crm_trace("Action %.16llx (A_DISCONNECT) %s", A_HA_DISCONNECT, text); + } + if (pcmk_is_set(action, A_LRM_CONNECT)) { + crm_trace("Action %.16llx (A_LRM_CONNECT) %s", A_LRM_CONNECT, text); + } + if (pcmk_is_set(action, A_LRM_EVENT)) { + crm_trace("Action %.16llx (A_LRM_EVENT) %s", A_LRM_EVENT, text); + } + if (pcmk_is_set(action, A_LRM_INVOKE)) { + crm_trace("Action %.16llx (A_LRM_INVOKE) %s", A_LRM_INVOKE, text); + } + if (pcmk_is_set(action, A_LRM_DISCONNECT)) { + crm_trace("Action %.16llx (A_LRM_DISCONNECT) %s", A_LRM_DISCONNECT, text); + } + if (pcmk_is_set(action, A_DC_TIMER_STOP)) { + crm_trace("Action %.16llx (A_DC_TIMER_STOP) %s", A_DC_TIMER_STOP, text); + } + if (pcmk_is_set(action, A_DC_TIMER_START)) { + crm_trace("Action %.16llx (A_DC_TIMER_START) %s", A_DC_TIMER_START, text); + } + if (pcmk_is_set(action, A_INTEGRATE_TIMER_START)) { + crm_trace("Action %.16llx (A_INTEGRATE_TIMER_START) %s", A_INTEGRATE_TIMER_START, text); + } + if (pcmk_is_set(action, A_INTEGRATE_TIMER_STOP)) { + crm_trace("Action %.16llx (A_INTEGRATE_TIMER_STOP) %s", A_INTEGRATE_TIMER_STOP, text); + } + if (pcmk_is_set(action, A_FINALIZE_TIMER_START)) { + crm_trace("Action %.16llx (A_FINALIZE_TIMER_START) %s", A_FINALIZE_TIMER_START, text); + } + if (pcmk_is_set(action, A_FINALIZE_TIMER_STOP)) { + crm_trace("Action %.16llx (A_FINALIZE_TIMER_STOP) %s", A_FINALIZE_TIMER_STOP, text); + } + if (pcmk_is_set(action, A_ELECTION_COUNT)) { + crm_trace("Action %.16llx (A_ELECTION_COUNT) %s", A_ELECTION_COUNT, text); + } + if (pcmk_is_set(action, A_ELECTION_VOTE)) { + crm_trace("Action %.16llx (A_ELECTION_VOTE) %s", A_ELECTION_VOTE, text); + } + if (pcmk_is_set(action, A_ELECTION_CHECK)) { + crm_trace("Action %.16llx (A_ELECTION_CHECK) %s", A_ELECTION_CHECK, text); + } + if (pcmk_is_set(action, A_CL_JOIN_ANNOUNCE)) { + crm_trace("Action %.16llx (A_CL_JOIN_ANNOUNCE) %s", A_CL_JOIN_ANNOUNCE, text); + } + if (pcmk_is_set(action, A_CL_JOIN_REQUEST)) { + crm_trace("Action %.16llx (A_CL_JOIN_REQUEST) %s", A_CL_JOIN_REQUEST, text); + } + if (pcmk_is_set(action, A_CL_JOIN_RESULT)) { + crm_trace("Action %.16llx (A_CL_JOIN_RESULT) %s", A_CL_JOIN_RESULT, text); + } + if (pcmk_is_set(action, A_DC_JOIN_OFFER_ALL)) { + crm_trace("Action %.16llx (A_DC_JOIN_OFFER_ALL) %s", A_DC_JOIN_OFFER_ALL, text); + } + if (pcmk_is_set(action, A_DC_JOIN_OFFER_ONE)) { + crm_trace("Action %.16llx (A_DC_JOIN_OFFER_ONE) %s", A_DC_JOIN_OFFER_ONE, text); + } + if (pcmk_is_set(action, A_DC_JOIN_PROCESS_REQ)) { + crm_trace("Action %.16llx (A_DC_JOIN_PROCESS_REQ) %s", A_DC_JOIN_PROCESS_REQ, text); + } + if (pcmk_is_set(action, A_DC_JOIN_PROCESS_ACK)) { + crm_trace("Action %.16llx (A_DC_JOIN_PROCESS_ACK) %s", A_DC_JOIN_PROCESS_ACK, text); + } + if (pcmk_is_set(action, A_DC_JOIN_FINALIZE)) { + crm_trace("Action %.16llx (A_DC_JOIN_FINALIZE) %s", A_DC_JOIN_FINALIZE, text); + } + if (pcmk_is_set(action, A_MSG_PROCESS)) { + crm_trace("Action %.16llx (A_MSG_PROCESS) %s", A_MSG_PROCESS, text); + } + if (pcmk_is_set(action, A_MSG_ROUTE)) { + crm_trace("Action %.16llx (A_MSG_ROUTE) %s", A_MSG_ROUTE, text); + } + if (pcmk_is_set(action, A_RECOVER)) { + crm_trace("Action %.16llx (A_RECOVER) %s", A_RECOVER, text); + } + if (pcmk_is_set(action, A_DC_RELEASE)) { + crm_trace("Action %.16llx (A_DC_RELEASE) %s", A_DC_RELEASE, text); + } + if (pcmk_is_set(action, A_DC_RELEASED)) { + crm_trace("Action %.16llx (A_DC_RELEASED) %s", A_DC_RELEASED, text); + } + if (pcmk_is_set(action, A_DC_TAKEOVER)) { + crm_trace("Action %.16llx (A_DC_TAKEOVER) %s", A_DC_TAKEOVER, text); + } + if (pcmk_is_set(action, A_SHUTDOWN)) { + crm_trace("Action %.16llx (A_SHUTDOWN) %s", A_SHUTDOWN, text); + } + if (pcmk_is_set(action, A_SHUTDOWN_REQ)) { + crm_trace("Action %.16llx (A_SHUTDOWN_REQ) %s", A_SHUTDOWN_REQ, text); + } + if (pcmk_is_set(action, A_STOP)) { + crm_trace("Action %.16llx (A_STOP ) %s", A_STOP, text); + } + if (pcmk_is_set(action, A_EXIT_0)) { + crm_trace("Action %.16llx (A_EXIT_0) %s", A_EXIT_0, text); + } + if (pcmk_is_set(action, A_EXIT_1)) { + crm_trace("Action %.16llx (A_EXIT_1) %s", A_EXIT_1, text); + } + if (pcmk_is_set(action, A_CIB_START)) { + crm_trace("Action %.16llx (A_CIB_START) %s", A_CIB_START, text); + } + if (pcmk_is_set(action, A_CIB_STOP)) { + crm_trace("Action %.16llx (A_CIB_STOP) %s", A_CIB_STOP, text); + } + if (pcmk_is_set(action, A_TE_INVOKE)) { + crm_trace("Action %.16llx (A_TE_INVOKE) %s", A_TE_INVOKE, text); + } + if (pcmk_is_set(action, A_TE_START)) { + crm_trace("Action %.16llx (A_TE_START) %s", A_TE_START, text); + } + if (pcmk_is_set(action, A_TE_STOP)) { + crm_trace("Action %.16llx (A_TE_STOP) %s", A_TE_STOP, text); + } + if (pcmk_is_set(action, A_TE_CANCEL)) { + crm_trace("Action %.16llx (A_TE_CANCEL) %s", A_TE_CANCEL, text); + } + if (pcmk_is_set(action, A_PE_INVOKE)) { + crm_trace("Action %.16llx (A_PE_INVOKE) %s", A_PE_INVOKE, text); + } + if (pcmk_is_set(action, A_PE_START)) { + crm_trace("Action %.16llx (A_PE_START) %s", A_PE_START, text); + } + if (pcmk_is_set(action, A_PE_STOP)) { + crm_trace("Action %.16llx (A_PE_STOP) %s", A_PE_STOP, text); + } + if (pcmk_is_set(action, A_NODE_BLOCK)) { + crm_trace("Action %.16llx (A_NODE_BLOCK) %s", A_NODE_BLOCK, text); + } + if (pcmk_is_set(action, A_UPDATE_NODESTATUS)) { + crm_trace("Action %.16llx (A_UPDATE_NODESTATUS) %s", A_UPDATE_NODESTATUS, text); + } + if (pcmk_is_set(action, A_LOG)) { + crm_trace("Action %.16llx (A_LOG ) %s", A_LOG, text); + } + if (pcmk_is_set(action, A_ERROR)) { + crm_trace("Action %.16llx (A_ERROR ) %s", A_ERROR, text); + } + if (pcmk_is_set(action, A_WARN)) { + crm_trace("Action %.16llx (A_WARN ) %s", A_WARN, text); + } +} + +gboolean +update_dc(xmlNode * msg) +{ + char *last_dc = controld_globals.dc_name; + const char *dc_version = NULL; + const char *welcome_from = NULL; + + if (msg != NULL) { + gboolean invalid = FALSE; + + dc_version = crm_element_value(msg, F_CRM_VERSION); + welcome_from = crm_element_value(msg, F_CRM_HOST_FROM); + + CRM_CHECK(dc_version != NULL, return FALSE); + CRM_CHECK(welcome_from != NULL, return FALSE); + + if (AM_I_DC + && !pcmk__str_eq(welcome_from, controld_globals.our_nodename, + pcmk__str_casei)) { + invalid = TRUE; + + } else if ((controld_globals.dc_name != NULL) + && !pcmk__str_eq(welcome_from, controld_globals.dc_name, + pcmk__str_casei)) { + invalid = TRUE; + } + + if (invalid) { + if (AM_I_DC) { + crm_err("Not updating DC to %s (%s): we are also a DC", + welcome_from, dc_version); + } else { + crm_warn("New DC %s is not %s", + welcome_from, controld_globals.dc_name); + } + + controld_set_fsa_action_flags(A_CL_JOIN_QUERY | A_DC_TIMER_START); + controld_trigger_fsa(); + return FALSE; + } + } + + controld_globals.dc_name = NULL; // freed as last_dc + pcmk__str_update(&(controld_globals.dc_name), welcome_from); + pcmk__str_update(&(controld_globals.dc_version), dc_version); + + if (pcmk__str_eq(controld_globals.dc_name, last_dc, pcmk__str_casei)) { + /* do nothing */ + + } else if (controld_globals.dc_name != NULL) { + crm_node_t *dc_node = crm_get_peer(0, controld_globals.dc_name); + + crm_info("Set DC to %s (%s)", + controld_globals.dc_name, + pcmk__s(controld_globals.dc_version, "unknown version")); + pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_MEMBER); + + } else if (last_dc != NULL) { + crm_info("Unset DC (was %s)", last_dc); + } + + free(last_dc); + return TRUE; +} + +void crmd_peer_down(crm_node_t *peer, bool full) +{ + if(full && peer->state == NULL) { + pcmk__update_peer_state(__func__, peer, CRM_NODE_LOST, 0); + crm_update_peer_proc(__func__, peer, crm_proc_none, NULL); + } + crm_update_peer_join(__func__, peer, crm_join_none); + pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN); +} + +/*! + * \internal + * \brief Check feature set compatibility of DC and joining node + * + * Return true if a joining node's CRM feature set is compatible with the + * current DC's. The feature sets are compatible if they have the same major + * version number, and the DC's minor version number is the same or older than + * the joining node's. The minor-minor version is intended solely to allow + * resource agents to detect feature support, and so is ignored. + * + * \param[in] dc_version DC's feature set + * \param[in] join_version Joining node's version + */ +bool +feature_set_compatible(const char *dc_version, const char *join_version) +{ + char *dc_minor = NULL; + char *join_minor = NULL; + long dc_v = 0; + long join_v = 0; + + // Get DC's major version + errno = 0; + dc_v = strtol(dc_version, &dc_minor, 10); + if (errno) { + return FALSE; + } + + // Get joining node's major version + errno = 0; + join_v = strtol(join_version, &join_minor, 10); + if (errno) { + return FALSE; + } + + // Major version component must be identical + if (dc_v != join_v) { + return FALSE; + } + + // Get DC's minor version + if (*dc_minor == '.') { + ++dc_minor; + } + errno = 0; + dc_v = strtol(dc_minor, NULL, 10); + if (errno) { + return FALSE; + } + + // Get joining node's minor version + if (*join_minor == '.') { + ++join_minor; + } + errno = 0; + join_v = strtol(join_minor, NULL, 10); + if (errno) { + return FALSE; + } + + // DC's minor version must be the same or older + return dc_v <= join_v; +} + +const char * +get_node_id(xmlNode *lrm_rsc_op) +{ + xmlNode *node = lrm_rsc_op; + + while (node != NULL && !pcmk__str_eq(XML_CIB_TAG_STATE, TYPE(node), pcmk__str_casei)) { + node = node->parent; + } + + CRM_CHECK(node != NULL, return NULL); + return ID(node); +} diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h new file mode 100644 index 0000000..6ce413d --- /dev/null +++ b/daemons/controld/controld_utils.h @@ -0,0 +1,61 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef CRMD_UTILS__H +# define CRMD_UTILS__H + +# include <crm/crm.h> +# include <crm/common/xml.h> + +# define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + +enum node_update_flags { + node_update_none = 0x0000, + node_update_quick = 0x0001, + node_update_cluster = 0x0010, + node_update_peer = 0x0020, + node_update_join = 0x0040, + node_update_expected = 0x0100, + node_update_all = node_update_cluster|node_update_peer|node_update_join|node_update_expected, +}; + +crm_exit_t crmd_exit(crm_exit_t exit_code); +_Noreturn void crmd_fast_exit(crm_exit_t exit_code); +void controld_shutdown_schedulerd_ipc(void); +void controld_stop_sched_timer(void); +void controld_free_sched_timer(void); +void controld_expect_sched_reply(char *ref); + +void fsa_dump_actions(uint64_t action, const char *text); +void fsa_dump_inputs(int log_level, const char *text, long long input_register); + +gboolean update_dc(xmlNode * msg); +void crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase); +xmlNode *create_node_state_update(crm_node_t *node, int flags, + xmlNode *parent, const char *source); +void populate_cib_nodes(enum node_update_flags flags, const char *source); +void crm_update_quorum(gboolean quorum, gboolean force_update); +void controld_close_attrd_ipc(void); +void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node); +void update_attrd_list(GList *attrs, uint32_t opts); +void update_attrd_remote_node_removed(const char *host, const char *user_name); +void update_attrd_clear_failures(const char *host, const char *rsc, + const char *op, const char *interval_spec, + gboolean is_remote_node); + +int crmd_join_phase_count(enum crm_join_phase phase); +void crmd_join_phase_log(int level); + +void crmd_peer_down(crm_node_t *peer, bool full); + +bool feature_set_compatible(const char *dc_version, const char *join_version); + +const char *get_node_id(xmlNode *lrm_rsc_op); + +#endif diff --git a/daemons/controld/pacemaker-controld.c b/daemons/controld/pacemaker-controld.c new file mode 100644 index 0000000..5858898 --- /dev/null +++ b/daemons/controld/pacemaker-controld.c @@ -0,0 +1,205 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <sys/param.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#include <stdlib.h> +#include <errno.h> +#include <fcntl.h> + +#include <crm/crm.h> +#include <crm/common/cmdline_internal.h> +#include <crm/common/ipc.h> +#include <crm/common/output_internal.h> +#include <crm/common/xml.h> + +#include <pacemaker-controld.h> + +#define SUMMARY "daemon for coordinating a Pacemaker cluster's response " \ + "to events" + +_Noreturn void crmd_init(void); +extern void init_dotfile(void); + +controld_globals_t controld_globals = { + // Automatic initialization to 0, false, or NULL is fine for most members + .fsa_state = S_STARTING, + .fsa_actions = A_NOTHING, +}; + +static pcmk__supported_format_t formats[] = { + PCMK__SUPPORTED_FORMAT_NONE, + PCMK__SUPPORTED_FORMAT_TEXT, + PCMK__SUPPORTED_FORMAT_XML, + { NULL, NULL, NULL } +}; + +static GOptionContext * +build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) +{ + return pcmk__build_arg_context(args, "text (default), xml", group, + "[metadata]"); +} + +int +main(int argc, char **argv) +{ + int rc = pcmk_rc_ok; + crm_exit_t exit_code = CRM_EX_OK; + bool initialize = true; + + crm_ipc_t *old_instance = NULL; + + pcmk__output_t *out = NULL; + + GError *error = NULL; + + GOptionGroup *output_group = NULL; + pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); + gchar **processed_args = pcmk__cmdline_preproc(argv, NULL); + GOptionContext *context = build_arg_context(args, &output_group); + + crm_log_preinit(NULL, argc, argv); + + pcmk__register_formats(output_group, formats); + if (!g_option_context_parse_strv(context, &processed_args, &error)) { + exit_code = CRM_EX_USAGE; + goto done; + } + + rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); + if (rc != pcmk_rc_ok) { + exit_code = CRM_EX_ERROR; + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "Error creating output format %s: %s", + args->output_ty, pcmk_rc_str(rc)); + goto done; + } + + if (args->version) { + out->version(out, false); + initialize = false; + goto done; + } + + if ((g_strv_length(processed_args) >= 2) + && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { + crmd_metadata(); + initialize = false; + goto done; + } + + pcmk__cli_init_logging("pacemaker-controld", args->verbosity); + crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); + crm_notice("Starting Pacemaker controller"); + + old_instance = crm_ipc_new(CRM_SYSTEM_CRMD, 0); + if (old_instance == NULL) { + /* crm_ipc_new will have already printed an error message with crm_err. */ + exit_code = CRM_EX_FATAL; + goto done; + } + + if (crm_ipc_connect(old_instance)) { + /* IPC end-point already up */ + crm_ipc_close(old_instance); + crm_ipc_destroy(old_instance); + crm_err("pacemaker-controld is already active, aborting startup"); + initialize = false; + goto done; + + } else { + /* not up or not authentic, we'll proceed either way */ + crm_ipc_destroy(old_instance); + old_instance = NULL; + } + + if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) { + exit_code = CRM_EX_FATAL; + crm_err("Terminating due to bad permissions on " PE_STATE_DIR); + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "Bad permissions on " PE_STATE_DIR + " (see logs for details)"); + goto done; + + } else if (pcmk__daemon_can_write(CRM_CONFIG_DIR, NULL) == FALSE) { + exit_code = CRM_EX_FATAL; + crm_err("Terminating due to bad permissions on " CRM_CONFIG_DIR); + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "Bad permissions on " CRM_CONFIG_DIR + " (see logs for details)"); + goto done; + } + + if (pcmk__log_output_new(&(controld_globals.logger_out)) != pcmk_rc_ok) { + exit_code = CRM_EX_FATAL; + goto done; + } + + pcmk__output_set_log_level(controld_globals.logger_out, LOG_TRACE); + +done: + g_strfreev(processed_args); + pcmk__free_arg_context(context); + + pcmk__output_and_clear_error(&error, out); + + if (out != NULL) { + out->finish(out, exit_code, true, NULL); + pcmk__output_free(out); + } + pcmk__unregister_formats(); + + if ((exit_code == CRM_EX_OK) && initialize) { + // Does not return + crmd_init(); + } + crm_exit(exit_code); +} + +void +crmd_init(void) +{ + crm_exit_t exit_code = CRM_EX_OK; + enum crmd_fsa_state state; + + init_dotfile(); + register_fsa_input(C_STARTUP, I_STARTUP, NULL); + + crm_peer_init(); + state = s_crmd_fsa(C_STARTUP); + + if (state == S_PENDING || state == S_STARTING) { + /* Create the mainloop and run it... */ + crm_trace("Starting %s's mainloop", crm_system_name); + controld_globals.mainloop = g_main_loop_new(NULL, FALSE); + g_main_loop_run(controld_globals.mainloop); + if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) { + crm_info("Inhibiting automated respawn"); + exit_code = CRM_EX_FATAL; + } + + } else { + crm_err("Startup of %s failed. Current state: %s", + crm_system_name, fsa_state2string(state)); + exit_code = CRM_EX_ERROR; + } + + crm_info("%s[%lu] exiting with status %d (%s)", + crm_system_name, (unsigned long) getpid(), exit_code, + crm_exit_str(exit_code)); + + crmd_fast_exit(exit_code); +} diff --git a/daemons/controld/pacemaker-controld.h b/daemons/controld/pacemaker-controld.h new file mode 100644 index 0000000..1484a00 --- /dev/null +++ b/daemons/controld/pacemaker-controld.h @@ -0,0 +1,39 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef CRMD__H +# define CRMD__H + +#include <controld_alerts.h> +#include <controld_callbacks.h> +#include <controld_cib.h> +#include <controld_fencing.h> +#include <controld_fsa.h> +#include <controld_globals.h> +#include <controld_timers.h> +#include <controld_lrm.h> +#include <controld_membership.h> +#include <controld_messages.h> +#include <controld_metadata.h> +#include <controld_throttle.h> +#include <controld_transition.h> +#include <controld_utils.h> + +# define controld_trigger_config() \ + controld_trigger_config_as(__func__, __LINE__) + +void crmd_metadata(void); +void controld_trigger_config_as(const char *fn, int line); +void controld_election_init(const char *uname); +void controld_configure_election(GHashTable *options); +void controld_remove_voter(const char *uname); +void controld_election_fini(void); +void controld_stop_current_election_timeout(void); + +#endif |