From e5a812082ae033afb1eed82c0f2df3d0f6bdc93f Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 08:53:20 +0200 Subject: Adding upstream version 2.1.6. Signed-off-by: Daniel Baumann --- daemons/Makefile.am | 11 + daemons/attrd/Makefile.am | 48 + daemons/attrd/attrd_alerts.c | 145 + daemons/attrd/attrd_attributes.c | 188 ++ daemons/attrd/attrd_cib.c | 380 +++ daemons/attrd/attrd_corosync.c | 620 ++++ daemons/attrd/attrd_elections.c | 179 ++ daemons/attrd/attrd_ipc.c | 628 ++++ daemons/attrd/attrd_messages.c | 328 ++ daemons/attrd/attrd_sync.c | 577 ++++ daemons/attrd/attrd_utils.c | 362 +++ daemons/attrd/pacemaker-attrd.c | 358 +++ daemons/attrd/pacemaker-attrd.h | 216 ++ daemons/based/Makefile.am | 47 + daemons/based/based_callbacks.c | 1696 ++++++++++ daemons/based/based_common.c | 352 +++ daemons/based/based_io.c | 473 +++ daemons/based/based_messages.c | 427 +++ daemons/based/based_notify.c | 305 ++ daemons/based/based_remote.c | 680 ++++ daemons/based/cib.pam | 6 + daemons/based/pacemaker-based.c | 442 +++ daemons/based/pacemaker-based.h | 150 + daemons/controld/Makefile.am | 87 + daemons/controld/controld_alerts.c | 88 + daemons/controld/controld_alerts.h | 22 + daemons/controld/controld_attrd.c | 160 + daemons/controld/controld_callbacks.c | 367 +++ daemons/controld/controld_callbacks.h | 21 + daemons/controld/controld_cib.c | 1138 +++++++ daemons/controld/controld_cib.h | 125 + daemons/controld/controld_control.c | 857 +++++ daemons/controld/controld_corosync.c | 164 + daemons/controld/controld_election.c | 292 ++ daemons/controld/controld_execd.c | 2433 ++++++++++++++ daemons/controld/controld_execd_state.c | 814 +++++ daemons/controld/controld_fencing.c | 1108 +++++++ daemons/controld/controld_fencing.h | 38 + daemons/controld/controld_fsa.c | 741 +++++ daemons/controld/controld_fsa.h | 694 ++++ daemons/controld/controld_globals.h | 143 + daemons/controld/controld_join_client.c | 366 +++ daemons/controld/controld_join_dc.c | 987 ++++++ daemons/controld/controld_lrm.h | 188 ++ daemons/controld/controld_matrix.c | 1250 ++++++++ daemons/controld/controld_membership.c | 457 +++ daemons/controld/controld_membership.h | 29 + daemons/controld/controld_messages.c | 1307 ++++++++ daemons/controld/controld_messages.h | 86 + daemons/controld/controld_metadata.c | 320 ++ daemons/controld/controld_metadata.h | 96 + daemons/controld/controld_remote_ra.c | 1440 +++++++++ daemons/controld/controld_schedulerd.c | 506 +++ daemons/controld/controld_te_actions.c | 746 +++++ daemons/controld/controld_te_callbacks.c | 689 ++++ daemons/controld/controld_te_events.c | 601 ++++ daemons/controld/controld_te_utils.c | 367 +++ daemons/controld/controld_throttle.c | 574 ++++ daemons/controld/controld_throttle.h | 16 + daemons/controld/controld_timers.c | 509 +++ daemons/controld/controld_timers.h | 36 + daemons/controld/controld_transition.c | 197 ++ daemons/controld/controld_transition.h | 63 + daemons/controld/controld_utils.c | 837 +++++ daemons/controld/controld_utils.h | 61 + daemons/controld/pacemaker-controld.c | 205 ++ daemons/controld/pacemaker-controld.h | 39 + daemons/execd/Makefile.am | 76 + daemons/execd/cts-exec-helper.c | 624 ++++ daemons/execd/execd_alerts.c | 205 ++ daemons/execd/execd_commands.c | 1927 ++++++++++++ daemons/execd/pacemaker-execd.c | 582 ++++ daemons/execd/pacemaker-execd.h | 110 + daemons/execd/pacemaker-remoted.8.inc | 5 + daemons/execd/pacemaker_remote.in | 176 ++ daemons/execd/pacemaker_remote.service.in | 52 + daemons/execd/remoted_pidone.c | 298 ++ daemons/execd/remoted_proxy.c | 470 +++ daemons/execd/remoted_tls.c | 428 +++ daemons/fenced/Makefile.am | 52 + daemons/fenced/cts-fence-helper.c | 681 ++++ daemons/fenced/fenced_commands.c | 3674 ++++++++++++++++++++++ daemons/fenced/fenced_history.c | 548 ++++ daemons/fenced/fenced_remote.c | 2509 +++++++++++++++ daemons/fenced/pacemaker-fenced.c | 1751 +++++++++++ daemons/fenced/pacemaker-fenced.h | 315 ++ daemons/pacemakerd/Makefile.am | 37 + daemons/pacemakerd/pacemaker.combined.upstart.in | 67 + daemons/pacemakerd/pacemaker.service.in | 103 + daemons/pacemakerd/pacemaker.upstart.in | 33 + daemons/pacemakerd/pacemakerd.8.inc | 5 + daemons/pacemakerd/pacemakerd.c | 483 +++ daemons/pacemakerd/pacemakerd.h | 35 + daemons/pacemakerd/pcmkd_corosync.c | 371 +++ daemons/pacemakerd/pcmkd_messages.c | 278 ++ daemons/pacemakerd/pcmkd_subdaemons.c | 888 ++++++ daemons/schedulerd/Makefile.am | 53 + daemons/schedulerd/pacemaker-schedulerd.c | 181 ++ daemons/schedulerd/pacemaker-schedulerd.h | 20 + daemons/schedulerd/schedulerd_messages.c | 335 ++ 100 files changed, 47254 insertions(+) create mode 100644 daemons/Makefile.am create mode 100644 daemons/attrd/Makefile.am create mode 100644 daemons/attrd/attrd_alerts.c create mode 100644 daemons/attrd/attrd_attributes.c create mode 100644 daemons/attrd/attrd_cib.c create mode 100644 daemons/attrd/attrd_corosync.c create mode 100644 daemons/attrd/attrd_elections.c create mode 100644 daemons/attrd/attrd_ipc.c create mode 100644 daemons/attrd/attrd_messages.c create mode 100644 daemons/attrd/attrd_sync.c create mode 100644 daemons/attrd/attrd_utils.c create mode 100644 daemons/attrd/pacemaker-attrd.c create mode 100644 daemons/attrd/pacemaker-attrd.h create mode 100644 daemons/based/Makefile.am create mode 100644 daemons/based/based_callbacks.c create mode 100644 daemons/based/based_common.c create mode 100644 daemons/based/based_io.c create mode 100644 daemons/based/based_messages.c create mode 100644 daemons/based/based_notify.c create mode 100644 daemons/based/based_remote.c create mode 100644 daemons/based/cib.pam create mode 100644 daemons/based/pacemaker-based.c create mode 100644 daemons/based/pacemaker-based.h create mode 100644 daemons/controld/Makefile.am create mode 100644 daemons/controld/controld_alerts.c create mode 100644 daemons/controld/controld_alerts.h create mode 100644 daemons/controld/controld_attrd.c create mode 100644 daemons/controld/controld_callbacks.c create mode 100644 daemons/controld/controld_callbacks.h create mode 100644 daemons/controld/controld_cib.c create mode 100644 daemons/controld/controld_cib.h create mode 100644 daemons/controld/controld_control.c create mode 100644 daemons/controld/controld_corosync.c create mode 100644 daemons/controld/controld_election.c create mode 100644 daemons/controld/controld_execd.c create mode 100644 daemons/controld/controld_execd_state.c create mode 100644 daemons/controld/controld_fencing.c create mode 100644 daemons/controld/controld_fencing.h create mode 100644 daemons/controld/controld_fsa.c create mode 100644 daemons/controld/controld_fsa.h create mode 100644 daemons/controld/controld_globals.h create mode 100644 daemons/controld/controld_join_client.c create mode 100644 daemons/controld/controld_join_dc.c create mode 100644 daemons/controld/controld_lrm.h create mode 100644 daemons/controld/controld_matrix.c create mode 100644 daemons/controld/controld_membership.c create mode 100644 daemons/controld/controld_membership.h create mode 100644 daemons/controld/controld_messages.c create mode 100644 daemons/controld/controld_messages.h create mode 100644 daemons/controld/controld_metadata.c create mode 100644 daemons/controld/controld_metadata.h create mode 100644 daemons/controld/controld_remote_ra.c create mode 100644 daemons/controld/controld_schedulerd.c create mode 100644 daemons/controld/controld_te_actions.c create mode 100644 daemons/controld/controld_te_callbacks.c create mode 100644 daemons/controld/controld_te_events.c create mode 100644 daemons/controld/controld_te_utils.c create mode 100644 daemons/controld/controld_throttle.c create mode 100644 daemons/controld/controld_throttle.h create mode 100644 daemons/controld/controld_timers.c create mode 100644 daemons/controld/controld_timers.h create mode 100644 daemons/controld/controld_transition.c create mode 100644 daemons/controld/controld_transition.h create mode 100644 daemons/controld/controld_utils.c create mode 100644 daemons/controld/controld_utils.h create mode 100644 daemons/controld/pacemaker-controld.c create mode 100644 daemons/controld/pacemaker-controld.h create mode 100644 daemons/execd/Makefile.am create mode 100644 daemons/execd/cts-exec-helper.c create mode 100644 daemons/execd/execd_alerts.c create mode 100644 daemons/execd/execd_commands.c create mode 100644 daemons/execd/pacemaker-execd.c create mode 100644 daemons/execd/pacemaker-execd.h create mode 100644 daemons/execd/pacemaker-remoted.8.inc create mode 100644 daemons/execd/pacemaker_remote.in create mode 100644 daemons/execd/pacemaker_remote.service.in create mode 100644 daemons/execd/remoted_pidone.c create mode 100644 daemons/execd/remoted_proxy.c create mode 100644 daemons/execd/remoted_tls.c create mode 100644 daemons/fenced/Makefile.am create mode 100644 daemons/fenced/cts-fence-helper.c create mode 100644 daemons/fenced/fenced_commands.c create mode 100644 daemons/fenced/fenced_history.c create mode 100644 daemons/fenced/fenced_remote.c create mode 100644 daemons/fenced/pacemaker-fenced.c create mode 100644 daemons/fenced/pacemaker-fenced.h create mode 100644 daemons/pacemakerd/Makefile.am create mode 100644 daemons/pacemakerd/pacemaker.combined.upstart.in create mode 100644 daemons/pacemakerd/pacemaker.service.in create mode 100644 daemons/pacemakerd/pacemaker.upstart.in create mode 100644 daemons/pacemakerd/pacemakerd.8.inc create mode 100644 daemons/pacemakerd/pacemakerd.c create mode 100644 daemons/pacemakerd/pacemakerd.h create mode 100644 daemons/pacemakerd/pcmkd_corosync.c create mode 100644 daemons/pacemakerd/pcmkd_messages.c create mode 100644 daemons/pacemakerd/pcmkd_subdaemons.c create mode 100644 daemons/schedulerd/Makefile.am create mode 100644 daemons/schedulerd/pacemaker-schedulerd.c create mode 100644 daemons/schedulerd/pacemaker-schedulerd.h create mode 100644 daemons/schedulerd/schedulerd_messages.c (limited to 'daemons') diff --git a/daemons/Makefile.am b/daemons/Makefile.am new file mode 100644 index 0000000..743320b --- /dev/null +++ b/daemons/Makefile.am @@ -0,0 +1,11 @@ +# +# Copyright 2018 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. +# + +MAINTAINERCLEANFILES = Makefile.in +SUBDIRS = based schedulerd attrd controld execd fenced pacemakerd diff --git a/daemons/attrd/Makefile.am b/daemons/attrd/Makefile.am new file mode 100644 index 0000000..6bb81c4 --- /dev/null +++ b/daemons/attrd/Makefile.am @@ -0,0 +1,48 @@ +# +# Copyright 2004-2022 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. +# + +include $(top_srcdir)/mk/common.mk + +halibdir = $(CRM_DAEMON_DIR) + +halib_PROGRAMS = pacemaker-attrd + +noinst_HEADERS = pacemaker-attrd.h + +pacemaker_attrd_CFLAGS = $(CFLAGS_HARDENED_EXE) +pacemaker_attrd_LDFLAGS = $(LDFLAGS_HARDENED_EXE) + +pacemaker_attrd_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \ + $(top_builddir)/lib/pengine/libpe_rules.la \ + $(top_builddir)/lib/common/libcrmcommon.la \ + $(top_builddir)/lib/cib/libcib.la \ + $(top_builddir)/lib/lrmd/liblrmd.la \ + $(CLUSTERLIBS) + +pacemaker_attrd_SOURCES = attrd_alerts.c \ + attrd_attributes.c \ + attrd_cib.c \ + attrd_corosync.c \ + attrd_elections.c \ + attrd_ipc.c \ + attrd_messages.c \ + attrd_sync.c \ + attrd_utils.c \ + pacemaker-attrd.c + +clean-generic: + rm -f *.log *.debug *.xml *~ + +if BUILD_LEGACY_LINKS +install-exec-hook: + cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f attrd && $(LN_S) pacemaker-attrd attrd + +uninstall-hook: + cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f attrd +endif diff --git a/daemons/attrd/attrd_alerts.c b/daemons/attrd/attrd_alerts.c new file mode 100644 index 0000000..b694891 --- /dev/null +++ b/daemons/attrd/attrd_alerts.c @@ -0,0 +1,145 @@ +/* + * Copyright 2015-2021 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pacemaker-attrd.h" + +static GList *attrd_alert_list = NULL; + +static void +attrd_lrmd_callback(lrmd_event_data_t * op) +{ + CRM_CHECK(op != NULL, return); + switch (op->type) { + case lrmd_event_disconnect: + crm_info("Lost connection to executor"); + attrd_lrmd_disconnect(); + break; + default: + break; + } +} + +static lrmd_t * +attrd_lrmd_connect(void) +{ + if (the_lrmd == NULL) { + the_lrmd = lrmd_api_new(); + the_lrmd->cmds->set_callback(the_lrmd, attrd_lrmd_callback); + } + + if (!the_lrmd->cmds->is_connected(the_lrmd)) { + const unsigned int max_attempts = 10; + int ret = -ENOTCONN; + + for (int fails = 0; fails < max_attempts; ++fails) { + ret = the_lrmd->cmds->connect(the_lrmd, T_ATTRD, NULL); + if (ret == pcmk_ok) { + break; + } + + crm_debug("Could not connect to executor, %d tries remaining", + (max_attempts - fails)); + /* @TODO We don't want to block here with sleep, but we should wait + * some time between connection attempts. We could possibly add a + * timer with a callback, but then we'd likely need an alert queue. + */ + } + + if (ret != pcmk_ok) { + attrd_lrmd_disconnect(); + } + } + + return the_lrmd; +} + +void +attrd_lrmd_disconnect(void) { + if (the_lrmd) { + lrmd_t *conn = the_lrmd; + + the_lrmd = NULL; /* in case we're called recursively */ + lrmd_api_delete(conn); /* will disconnect if necessary */ + } +} + +static void +config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + xmlNode *crmalerts = NULL; + + if (rc == -ENXIO) { + crm_debug("Local CIB has no alerts section"); + return; + } else if (rc != pcmk_ok) { + crm_notice("Could not query local CIB: %s", pcmk_strerror(rc)); + return; + } + + crmalerts = output; + if (crmalerts && !pcmk__str_eq(crm_element_name(crmalerts), XML_CIB_TAG_ALERTS, pcmk__str_none)) { + crmalerts = first_named_child(crmalerts, XML_CIB_TAG_ALERTS); + } + if (!crmalerts) { + crm_notice("CIB query result has no " XML_CIB_TAG_ALERTS " section"); + return; + } + + pe_free_alert_list(attrd_alert_list); + attrd_alert_list = pe_unpack_alerts(crmalerts); +} + +#define XPATH_ALERTS \ + "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_ALERTS + +gboolean +attrd_read_options(gpointer user_data) +{ + int call_id; + + CRM_CHECK(the_cib != NULL, return TRUE); + + call_id = the_cib->cmds->query(the_cib, XPATH_ALERTS, NULL, + cib_xpath | cib_scope_local); + + the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, NULL, + "config_query_callback", + config_query_callback, free); + + crm_trace("Querying the CIB... call %d", call_id); + return TRUE; +} + +void +attrd_cib_updated_cb(const char *event, xmlNode * msg) +{ + if (!attrd_shutting_down() && pcmk__alert_in_patchset(msg, false)) { + mainloop_set_trigger(attrd_config_read); + } +} + +int +attrd_send_attribute_alert(const char *node, int nodeid, + const char *attr, const char *value) +{ + if (attrd_alert_list == NULL) { + return pcmk_ok; + } + return lrmd_send_attribute_alert(attrd_lrmd_connect(), attrd_alert_list, + node, nodeid, attr, value); +} diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c new file mode 100644 index 0000000..516ced7 --- /dev/null +++ b/daemons/attrd/attrd_attributes.c @@ -0,0 +1,188 @@ +/* + * Copyright 2013-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "pacemaker-attrd.h" + +static attribute_t * +attrd_create_attribute(xmlNode *xml) +{ + int dampen = 0; + const char *value = crm_element_value(xml, PCMK__XA_ATTR_DAMPENING); + attribute_t *a = calloc(1, sizeof(attribute_t)); + + CRM_ASSERT(a != NULL); + + a->id = crm_element_value_copy(xml, PCMK__XA_ATTR_NAME); + a->set_id = crm_element_value_copy(xml, PCMK__XA_ATTR_SET); + a->set_type = crm_element_value_copy(xml, PCMK__XA_ATTR_SET_TYPE); + a->uuid = crm_element_value_copy(xml, PCMK__XA_ATTR_UUID); + a->values = pcmk__strikey_table(NULL, attrd_free_attribute_value); + + crm_element_value_int(xml, PCMK__XA_ATTR_IS_PRIVATE, &a->is_private); + + a->user = crm_element_value_copy(xml, PCMK__XA_ATTR_USER); + crm_trace("Performing all %s operations as user '%s'", a->id, a->user); + + if (value != NULL) { + dampen = crm_get_msec(value); + } + crm_trace("Created attribute %s with %s write delay", a->id, + (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms)); + + if(dampen > 0) { + a->timeout_ms = dampen; + a->timer = attrd_add_timer(a->id, a->timeout_ms, a); + } else if (dampen < 0) { + crm_warn("Ignoring invalid delay %s for attribute %s", value, a->id); + } + + g_hash_table_replace(attributes, a->id, a); + return a; +} + +static int +attrd_update_dampening(attribute_t *a, xmlNode *xml, const char *attr) +{ + const char *dvalue = crm_element_value(xml, PCMK__XA_ATTR_DAMPENING); + int dampen = 0; + + if (dvalue == NULL) { + crm_warn("Could not update %s: peer did not specify value for delay", + attr); + return EINVAL; + } + + dampen = crm_get_msec(dvalue); + if (dampen < 0) { + crm_warn("Could not update %s: invalid delay value %dms (%s)", + attr, dampen, dvalue); + return EINVAL; + } + + if (a->timeout_ms != dampen) { + mainloop_timer_del(a->timer); + a->timeout_ms = dampen; + if (dampen > 0) { + a->timer = attrd_add_timer(attr, a->timeout_ms, a); + crm_info("Update attribute %s delay to %dms (%s)", + attr, dampen, dvalue); + } else { + a->timer = NULL; + crm_info("Update attribute %s to remove delay", attr); + } + + /* If dampening changed, do an immediate write-out, + * otherwise repeated dampening changes would prevent write-outs + */ + attrd_write_or_elect_attribute(a); + } + + return pcmk_rc_ok; +} + +GHashTable *attributes = NULL; + +/*! + * \internal + * \brief Create an XML representation of an attribute for use in peer messages + * + * \param[in,out] parent Create attribute XML as child element of this + * \param[in] a Attribute to represent + * \param[in] v Attribute value to represent + * \param[in] force_write If true, value should be written even if unchanged + * + * \return XML representation of attribute + */ +xmlNode * +attrd_add_value_xml(xmlNode *parent, const attribute_t *a, + const attribute_value_t *v, bool force_write) +{ + xmlNode *xml = create_xml_node(parent, __func__); + + crm_xml_add(xml, PCMK__XA_ATTR_NAME, a->id); + crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set_id); + crm_xml_add(xml, PCMK__XA_ATTR_UUID, a->uuid); + crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user); + pcmk__xe_add_node(xml, v->nodename, v->nodeid); + if (v->is_remote != 0) { + crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE, 1); + } + crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current); + crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, a->timeout_ms / 1000); + crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, a->is_private); + crm_xml_add_int(xml, PCMK__XA_ATTR_FORCE, force_write); + + return xml; +} + +void +attrd_clear_value_seen(void) +{ + GHashTableIter aIter; + GHashTableIter vIter; + attribute_t *a; + attribute_value_t *v = NULL; + + g_hash_table_iter_init(&aIter, attributes); + while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { + g_hash_table_iter_init(&vIter, a->values); + while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { + v->seen = FALSE; + crm_trace("Clear seen flag %s[%s] = %s.", a->id, v->nodename, v->current); + } + } +} + +attribute_t * +attrd_populate_attribute(xmlNode *xml, const char *attr) +{ + attribute_t *a = NULL; + bool update_both = false; + + const char *op = crm_element_value(xml, PCMK__XA_TASK); + + // NULL because PCMK__ATTRD_CMD_SYNC_RESPONSE has no PCMK__XA_TASK + update_both = pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_BOTH, + pcmk__str_null_matches); + + // Look up or create attribute entry + a = g_hash_table_lookup(attributes, attr); + if (a == NULL) { + if (update_both || pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE, pcmk__str_none)) { + a = attrd_create_attribute(xml); + } else { + crm_warn("Could not update %s: attribute not found", attr); + return NULL; + } + } + + // Update attribute dampening + if (update_both || pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) { + int rc = attrd_update_dampening(a, xml, attr); + + if (rc != pcmk_rc_ok || !update_both) { + return NULL; + } + } + + return a; +} diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c new file mode 100644 index 0000000..928c013 --- /dev/null +++ b/daemons/attrd/attrd_cib.c @@ -0,0 +1,380 @@ +/* + * Copyright 2013-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "pacemaker-attrd.h" + +static int last_cib_op_done = 0; + +static gboolean +attribute_timer_cb(gpointer data) +{ + attribute_t *a = data; + crm_trace("Dampen interval expired for %s", a->id); + attrd_write_or_elect_attribute(a); + return FALSE; +} + +static void +attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) +{ + int level = LOG_ERR; + GHashTableIter iter; + const char *peer = NULL; + attribute_value_t *v = NULL; + + char *name = user_data; + attribute_t *a = g_hash_table_lookup(attributes, name); + + if(a == NULL) { + crm_info("Attribute %s no longer exists", name); + return; + } + + a->update = 0; + if (rc == pcmk_ok && call_id < 0) { + rc = call_id; + } + + switch (rc) { + case pcmk_ok: + level = LOG_INFO; + last_cib_op_done = call_id; + if (a->timer && !a->timeout_ms) { + // Remove temporary dampening for failed writes + mainloop_timer_del(a->timer); + a->timer = NULL; + } + break; + + case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */ + case -ETIME: /* When an attr changes while there is a DC election */ + case -ENXIO: /* When an attr changes while the CIB is syncing a + * newer config from a node that just came up + */ + level = LOG_WARNING; + break; + } + + do_crm_log(level, "CIB update %d result for %s: %s " CRM_XS " rc=%d", + call_id, a->id, pcmk_strerror(rc), rc); + + g_hash_table_iter_init(&iter, a->values); + while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) { + do_crm_log(level, "* %s[%s]=%s", a->id, peer, v->requested); + free(v->requested); + v->requested = NULL; + if (rc != pcmk_ok) { + a->changed = true; /* Attempt write out again */ + } + } + + if (a->changed && attrd_election_won()) { + if (rc == pcmk_ok) { + /* We deferred a write of a new update because this update was in + * progress. Write out the new value without additional delay. + */ + attrd_write_attribute(a, false); + + /* We're re-attempting a write because the original failed; delay + * the next attempt so we don't potentially flood the CIB manager + * and logs with a zillion attempts per second. + * + * @TODO We could elect a new writer instead. However, we'd have to + * somehow downgrade our vote, and we'd still need something like this + * if all peers similarly fail to write this attribute (which may + * indicate a corrupted attribute entry rather than a CIB issue). + */ + } else if (a->timer) { + // Attribute has a dampening value, so use that as delay + if (!mainloop_timer_running(a->timer)) { + crm_trace("Delayed re-attempted write for %s by %s", + name, pcmk__readable_interval(a->timeout_ms)); + mainloop_timer_start(a->timer); + } + } else { + /* Set a temporary dampening of 2 seconds (timer will continue + * to exist until the attribute's dampening gets set or the + * write succeeds). + */ + a->timer = attrd_add_timer(a->id, 2000, a); + mainloop_timer_start(a->timer); + } + } +} + +static void +build_update_element(xmlNode *parent, attribute_t *a, const char *nodeid, const char *value) +{ + const char *set = NULL; + xmlNode *xml_obj = NULL; + + xml_obj = create_xml_node(parent, XML_CIB_TAG_STATE); + crm_xml_add(xml_obj, XML_ATTR_ID, nodeid); + + xml_obj = create_xml_node(xml_obj, XML_TAG_TRANSIENT_NODEATTRS); + crm_xml_add(xml_obj, XML_ATTR_ID, nodeid); + + if (pcmk__str_eq(a->set_type, XML_TAG_ATTR_SETS, pcmk__str_null_matches)) { + xml_obj = create_xml_node(xml_obj, XML_TAG_ATTR_SETS); + } else if (pcmk__str_eq(a->set_type, XML_TAG_UTILIZATION, pcmk__str_none)) { + xml_obj = create_xml_node(xml_obj, XML_TAG_UTILIZATION); + } else { + crm_err("Unknown set type attribute: %s", a->set_type); + } + + if (a->set_id) { + crm_xml_set_id(xml_obj, "%s", a->set_id); + } else { + crm_xml_set_id(xml_obj, "%s-%s", XML_CIB_TAG_STATUS, nodeid); + } + set = ID(xml_obj); + + xml_obj = create_xml_node(xml_obj, XML_CIB_TAG_NVPAIR); + if (a->uuid) { + crm_xml_set_id(xml_obj, "%s", a->uuid); + } else { + crm_xml_set_id(xml_obj, "%s-%s", set, a->id); + } + crm_xml_add(xml_obj, XML_NVPAIR_ATTR_NAME, a->id); + + if(value) { + crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, value); + + } else { + crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, ""); + crm_xml_add(xml_obj, "__delete__", XML_NVPAIR_ATTR_VALUE); + } +} + +static void +send_alert_attributes_value(attribute_t *a, GHashTable *t) +{ + int rc = 0; + attribute_value_t *at = NULL; + GHashTableIter vIter; + + g_hash_table_iter_init(&vIter, t); + + while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & at)) { + rc = attrd_send_attribute_alert(at->nodename, at->nodeid, + a->id, at->current); + crm_trace("Sent alerts for %s[%s]=%s: nodeid=%d rc=%d", + a->id, at->nodename, at->current, at->nodeid, rc); + } +} + +static void +set_alert_attribute_value(GHashTable *t, attribute_value_t *v) +{ + attribute_value_t *a_v = NULL; + a_v = calloc(1, sizeof(attribute_value_t)); + CRM_ASSERT(a_v != NULL); + + a_v->nodeid = v->nodeid; + a_v->nodename = strdup(v->nodename); + pcmk__str_update(&a_v->current, v->current); + + g_hash_table_replace(t, a_v->nodename, a_v); +} + +mainloop_timer_t * +attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr) +{ + return mainloop_timer_add(id, timeout_ms, FALSE, attribute_timer_cb, attr); +} + +void +attrd_write_attribute(attribute_t *a, bool ignore_delay) +{ + int private_updates = 0, cib_updates = 0; + xmlNode *xml_top = NULL; + attribute_value_t *v = NULL; + GHashTableIter iter; + enum cib_call_options flags = cib_none; + GHashTable *alert_attribute_value = NULL; + + if (a == NULL) { + return; + } + + /* If this attribute will be written to the CIB ... */ + if (!stand_alone && !a->is_private) { + + /* Defer the write if now's not a good time */ + CRM_CHECK(the_cib != NULL, return); + if (a->update && (a->update < last_cib_op_done)) { + crm_info("Write out of '%s' continuing: update %d considered lost", a->id, a->update); + a->update = 0; // Don't log this message again + + } else if (a->update) { + crm_info("Write out of '%s' delayed: update %d in progress", a->id, a->update); + return; + + } else if (mainloop_timer_running(a->timer)) { + if (ignore_delay) { + /* 'refresh' forces a write of the current value of all attributes + * Cancel any existing timers, we're writing it NOW + */ + mainloop_timer_stop(a->timer); + crm_debug("Write out of '%s': timer is running but ignore delay", a->id); + } else { + crm_info("Write out of '%s' delayed: timer is running", a->id); + return; + } + } + + /* Initialize the status update XML */ + xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS); + } + + /* Attribute will be written shortly, so clear changed flag */ + a->changed = false; + + /* We will check all peers' uuids shortly, so initialize this to false */ + a->unknown_peer_uuids = false; + + /* Attribute will be written shortly, so clear forced write flag */ + a->force_write = FALSE; + + /* Make the table for the attribute trap */ + alert_attribute_value = pcmk__strikey_table(NULL, attrd_free_attribute_value); + + /* Iterate over each peer value of this attribute */ + g_hash_table_iter_init(&iter, a->values); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & v)) { + crm_node_t *peer = crm_get_peer_full(v->nodeid, v->nodename, CRM_GET_PEER_ANY); + + /* If the value's peer info does not correspond to a peer, ignore it */ + if (peer == NULL) { + crm_notice("Cannot update %s[%s]=%s because peer not known", + a->id, v->nodename, v->current); + continue; + } + + /* If we're just learning the peer's node id, remember it */ + if (peer->id && (v->nodeid == 0)) { + crm_trace("Learned ID %u for node %s", peer->id, v->nodename); + v->nodeid = peer->id; + } + + /* If this is a private attribute, no update needs to be sent */ + if (stand_alone || a->is_private) { + private_updates++; + continue; + } + + /* If the peer is found, but its uuid is unknown, defer write */ + if (peer->uuid == NULL) { + a->unknown_peer_uuids = true; + crm_notice("Cannot update %s[%s]=%s because peer UUID not known " + "(will retry if learned)", + a->id, v->nodename, v->current); + continue; + } + + /* Add this value to status update XML */ + crm_debug("Updating %s[%s]=%s (peer known as %s, UUID %s, ID %u/%u)", + a->id, v->nodename, v->current, + peer->uname, peer->uuid, peer->id, v->nodeid); + build_update_element(xml_top, a, peer->uuid, v->current); + cib_updates++; + + /* Preservation of the attribute to transmit alert */ + set_alert_attribute_value(alert_attribute_value, v); + + free(v->requested); + v->requested = NULL; + if (v->current) { + v->requested = strdup(v->current); + } else { + /* Older attrd versions don't know about the cib_mixed_update + * flag so make sure it goes to the local cib which does + */ + cib__set_call_options(flags, crm_system_name, + cib_mixed_update|cib_scope_local); + } + } + + if (private_updates) { + crm_info("Processed %d private change%s for %s, id=%s, set=%s", + private_updates, pcmk__plural_s(private_updates), + a->id, pcmk__s(a->uuid, "n/a"), pcmk__s(a->set_id, "n/a")); + } + if (cib_updates) { + crm_log_xml_trace(xml_top, __func__); + + a->update = cib_internal_op(the_cib, PCMK__CIB_REQUEST_MODIFY, NULL, + XML_CIB_TAG_STATUS, xml_top, NULL, flags, + a->user); + + crm_info("Sent CIB request %d with %d change%s for %s (id %s, set %s)", + a->update, cib_updates, pcmk__plural_s(cib_updates), + a->id, pcmk__s(a->uuid, "n/a"), pcmk__s(a->set_id, "n/a")); + + the_cib->cmds->register_callback_full(the_cib, a->update, + CIB_OP_TIMEOUT_S, FALSE, + strdup(a->id), + "attrd_cib_callback", + attrd_cib_callback, free); + /* Transmit alert of the attribute */ + send_alert_attributes_value(a, alert_attribute_value); + } + + g_hash_table_destroy(alert_attribute_value); + free_xml(xml_top); +} + +void +attrd_write_attributes(bool all, bool ignore_delay) +{ + GHashTableIter iter; + attribute_t *a = NULL; + + crm_debug("Writing out %s attributes", all? "all" : "changed"); + g_hash_table_iter_init(&iter, attributes); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) { + if (!all && a->unknown_peer_uuids) { + // Try writing this attribute again, in case peer ID was learned + a->changed = true; + } else if (a->force_write) { + /* If the force_write flag is set, write the attribute. */ + a->changed = true; + } + + if(all || a->changed) { + /* When forced write flag is set, ignore delay. */ + attrd_write_attribute(a, (a->force_write ? true : ignore_delay)); + } else { + crm_trace("Skipping unchanged attribute %s", a->id); + } + } +} + +void +attrd_write_or_elect_attribute(attribute_t *a) +{ + if (attrd_election_won()) { + attrd_write_attribute(a, false); + } else { + attrd_start_election_if_needed(); + } +} diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c new file mode 100644 index 0000000..ef205e6 --- /dev/null +++ b/daemons/attrd/attrd_corosync.c @@ -0,0 +1,620 @@ +/* + * Copyright 2013-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "pacemaker-attrd.h" + +extern crm_exit_t attrd_exit_status; + +static xmlNode * +attrd_confirmation(int callid) +{ + xmlNode *node = create_xml_node(NULL, __func__); + + crm_xml_add(node, F_TYPE, T_ATTRD); + crm_xml_add(node, F_ORIG, get_local_node_name()); + crm_xml_add(node, PCMK__XA_TASK, PCMK__ATTRD_CMD_CONFIRM); + crm_xml_add_int(node, XML_LRM_ATTR_CALLID, callid); + + return node; +} + +static void +attrd_peer_message(crm_node_t *peer, xmlNode *xml) +{ + const char *election_op = crm_element_value(xml, F_CRM_TASK); + + if (election_op) { + attrd_handle_election_op(peer, xml); + return; + } + + if (attrd_shutting_down()) { + /* If we're shutting down, we want to continue responding to election + * ops as long as we're a cluster member (because our vote may be + * needed). Ignore all other messages. + */ + return; + + } else { + pcmk__request_t request = { + .ipc_client = NULL, + .ipc_id = 0, + .ipc_flags = 0, + .peer = peer->uname, + .xml = xml, + .call_options = 0, + .result = PCMK__UNKNOWN_RESULT, + }; + + request.op = crm_element_value_copy(request.xml, PCMK__XA_TASK); + CRM_CHECK(request.op != NULL, return); + + attrd_handle_request(&request); + + /* Having finished handling the request, check to see if the originating + * peer requested confirmation. If so, send that confirmation back now. + */ + if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) && + !pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { + int callid = 0; + xmlNode *reply = NULL; + + /* Add the confirmation ID for the message we are confirming to the + * response so the originating peer knows what they're a confirmation + * for. + */ + crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid); + reply = attrd_confirmation(callid); + + /* And then send the confirmation back to the originating peer. This + * ends up right back in this same function (attrd_peer_message) on the + * peer where it will have to do something with a PCMK__XA_CONFIRM type + * message. + */ + crm_debug("Sending %s a confirmation", peer->uname); + attrd_send_message(peer, reply, false); + free_xml(reply); + } + + pcmk__reset_request(&request); + } +} + +static void +attrd_cpg_dispatch(cpg_handle_t handle, + const struct cpg_name *groupName, + uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) +{ + uint32_t kind = 0; + xmlNode *xml = NULL; + const char *from = NULL; + char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); + + if(data == NULL) { + return; + } + + if (kind == crm_class_cluster) { + xml = string2xml(data); + } + + if (xml == NULL) { + crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data); + } else { + crm_node_t *peer = crm_get_peer(nodeid, from); + + attrd_peer_message(peer, xml); + } + + free_xml(xml); + free(data); +} + +static void +attrd_cpg_destroy(gpointer unused) +{ + if (attrd_shutting_down()) { + crm_info("Corosync disconnection complete"); + + } else { + crm_crit("Lost connection to cluster layer, shutting down"); + attrd_exit_status = CRM_EX_DISCONNECT; + attrd_shutdown(0); + } +} + +/*! + * \internal + * \brief Override an attribute sync with a local value + * + * Broadcast the local node's value for an attribute that's different from the + * value provided in a peer's attribute synchronization response. This ensures a + * node's values for itself take precedence and all peers are kept in sync. + * + * \param[in] a Attribute entry to override + * + * \return Local instance of attribute value + */ +static attribute_value_t * +broadcast_local_value(const attribute_t *a) +{ + attribute_value_t *v = g_hash_table_lookup(a->values, attrd_cluster->uname); + xmlNode *sync = create_xml_node(NULL, __func__); + + crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); + attrd_add_value_xml(sync, a, v, false); + attrd_send_message(NULL, sync, false); + free_xml(sync); + return v; +} + +/*! + * \internal + * \brief Ensure a Pacemaker Remote node is in the correct peer cache + * + * \param[in] node_name Name of Pacemaker Remote node to check + */ +static void +cache_remote_node(const char *node_name) +{ + /* If we previously assumed this node was an unseen cluster node, + * remove its entry from the cluster peer cache. + */ + crm_node_t *dup = pcmk__search_cluster_node_cache(0, node_name); + + if (dup && (dup->uuid == NULL)) { + reap_crm_member(0, node_name); + } + + // Ensure node is in the remote peer cache + CRM_ASSERT(crm_remote_peer_get(node_name) != NULL); +} + +#define state_text(state) pcmk__s((state), "in unknown state") + +/*! + * \internal + * \brief Return host's hash table entry (creating one if needed) + * + * \param[in,out] values Hash table of values + * \param[in] host Name of peer to look up + * \param[in] xml XML describing the attribute + * + * \return Pointer to new or existing hash table entry + */ +static attribute_value_t * +attrd_lookup_or_create_value(GHashTable *values, const char *host, + const xmlNode *xml) +{ + attribute_value_t *v = g_hash_table_lookup(values, host); + int is_remote = 0; + + crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); + if (is_remote) { + cache_remote_node(host); + } + + if (v == NULL) { + v = calloc(1, sizeof(attribute_value_t)); + CRM_ASSERT(v != NULL); + + pcmk__str_update(&v->nodename, host); + v->is_remote = is_remote; + g_hash_table_replace(values, v->nodename, v); + } + return(v); +} + +static void +attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data) +{ + bool gone = false; + bool is_remote = pcmk_is_set(peer->flags, crm_remote_node); + + switch (kind) { + case crm_status_uname: + crm_debug("%s node %s is now %s", + (is_remote? "Remote" : "Cluster"), + peer->uname, state_text(peer->state)); + break; + + case crm_status_processes: + if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) { + gone = true; + } + crm_debug("Node %s is %s a peer", + peer->uname, (gone? "no longer" : "now")); + break; + + case crm_status_nstate: + crm_debug("%s node %s is now %s (was %s)", + (is_remote? "Remote" : "Cluster"), + peer->uname, state_text(peer->state), state_text(data)); + if (pcmk__str_eq(peer->state, CRM_NODE_MEMBER, pcmk__str_casei)) { + /* If we're the writer, send new peers a list of all attributes + * (unless it's a remote node, which doesn't run its own attrd) + */ + if (attrd_election_won() + && !pcmk_is_set(peer->flags, crm_remote_node)) { + attrd_peer_sync(peer, NULL); + } + } else { + // Remove all attribute values associated with lost nodes + attrd_peer_remove(peer->uname, false, "loss"); + gone = true; + } + break; + } + + // Remove votes from cluster nodes that leave, in case election in progress + if (gone && !is_remote) { + attrd_remove_voter(peer); + attrd_remove_peer_protocol_ver(peer->uname); + attrd_do_not_expect_from_peer(peer->uname); + + // Ensure remote nodes that come up are in the remote node cache + } else if (!gone && is_remote) { + cache_remote_node(peer->uname); + } +} + +static void +record_peer_nodeid(attribute_value_t *v, const char *host) +{ + crm_node_t *known_peer = crm_get_peer(v->nodeid, host); + + crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid); + if (attrd_election_won()) { + attrd_write_attributes(false, false); + } +} + +static void +update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, + const char *attr, const char *value, const char *host, + bool filter, int is_force_write) +{ + attribute_value_t *v = NULL; + + v = attrd_lookup_or_create_value(a->values, host, xml); + + if (filter && !pcmk__str_eq(v->current, value, pcmk__str_casei) + && pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)) { + + crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s", + attr, host, v->current, value, peer->uname); + v = broadcast_local_value(a); + + } else if (!pcmk__str_eq(v->current, value, pcmk__str_casei)) { + crm_notice("Setting %s[%s]%s%s: %s -> %s " + CRM_XS " from %s with %s write delay", + attr, host, a->set_type ? " in " : "", + pcmk__s(a->set_type, ""), pcmk__s(v->current, "(unset)"), + pcmk__s(value, "(unset)"), peer->uname, + (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms)); + pcmk__str_update(&v->current, value); + a->changed = true; + + if (pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei) + && pcmk__str_eq(attr, XML_CIB_ATTR_SHUTDOWN, pcmk__str_none)) { + + if (!pcmk__str_eq(value, "0", pcmk__str_null_matches)) { + attrd_set_requesting_shutdown(); + + } else { + attrd_clear_requesting_shutdown(); + } + } + + // Write out new value or start dampening timer + if (a->timeout_ms && a->timer) { + crm_trace("Delayed write out (%dms) for %s", a->timeout_ms, attr); + mainloop_timer_start(a->timer); + } else { + attrd_write_or_elect_attribute(a); + } + + } else { + if (is_force_write == 1 && a->timeout_ms && a->timer) { + /* Save forced writing and set change flag. */ + /* The actual attribute is written by Writer after election. */ + crm_trace("Unchanged %s[%s] from %s is %s(Set the forced write flag)", + attr, host, peer->uname, value); + a->force_write = TRUE; + } else { + crm_trace("Unchanged %s[%s] from %s is %s", attr, host, peer->uname, value); + } + } + + /* Set the seen flag for attribute processing held only in the own node. */ + v->seen = TRUE; + + /* If this is a cluster node whose node ID we are learning, remember it */ + if ((v->nodeid == 0) && (v->is_remote == FALSE) + && (crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID, + (int*)&v->nodeid) == 0) && (v->nodeid > 0)) { + record_peer_nodeid(v, host); + } +} + +static void +attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter) +{ + attribute_t *a = NULL; + const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME); + const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); + const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); + int is_force_write = 0; + + if (attr == NULL) { + crm_warn("Could not update attribute: peer did not specify name"); + return; + } + + crm_element_value_int(xml, PCMK__XA_ATTR_FORCE, &is_force_write); + + a = attrd_populate_attribute(xml, attr); + if (a == NULL) { + return; + } + + if (host == NULL) { + // If no host was specified, update all hosts + GHashTableIter vIter; + + crm_debug("Setting %s for all hosts to %s", attr, value); + xml_remove_prop(xml, PCMK__XA_ATTR_NODE_ID); + g_hash_table_iter_init(&vIter, a->values); + + while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) { + update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write); + } + + } else { + // Update attribute value for the given host + update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write); + } + + /* If this is a message from some attrd instance broadcasting its protocol + * version, check to see if it's a new minimum version. + */ + if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) { + attrd_update_minimum_protocol_ver(peer->uname, value); + } +} + +static void +broadcast_unseen_local_values(void) +{ + GHashTableIter aIter; + GHashTableIter vIter; + attribute_t *a = NULL; + attribute_value_t *v = NULL; + xmlNode *sync = NULL; + + g_hash_table_iter_init(&aIter, attributes); + while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { + g_hash_table_iter_init(&vIter, a->values); + while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { + if (!(v->seen) && pcmk__str_eq(v->nodename, attrd_cluster->uname, + pcmk__str_casei)) { + if (sync == NULL) { + sync = create_xml_node(NULL, __func__); + crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); + } + attrd_add_value_xml(sync, a, v, a->timeout_ms && a->timer); + } + } + } + + if (sync != NULL) { + crm_debug("Broadcasting local-only values"); + attrd_send_message(NULL, sync, false); + free_xml(sync); + } +} + +int +attrd_cluster_connect(void) +{ + attrd_cluster = pcmk_cluster_new(); + + attrd_cluster->destroy = attrd_cpg_destroy; + attrd_cluster->cpg.cpg_deliver_fn = attrd_cpg_dispatch; + attrd_cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; + + crm_set_status_callback(&attrd_peer_change_cb); + + if (crm_cluster_connect(attrd_cluster) == FALSE) { + crm_err("Cluster connection failed"); + return -ENOTCONN; + } + return pcmk_ok; +} + +void +attrd_peer_clear_failure(pcmk__request_t *request) +{ + xmlNode *xml = request->xml; + const char *rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE); + const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); + const char *op = crm_element_value(xml, PCMK__XA_ATTR_OPERATION); + const char *interval_spec = crm_element_value(xml, PCMK__XA_ATTR_INTERVAL); + guint interval_ms = crm_parse_interval_spec(interval_spec); + char *attr = NULL; + GHashTableIter iter; + regex_t regex; + + crm_node_t *peer = crm_get_peer(0, request->peer); + + if (attrd_failure_regex(®ex, rsc, op, interval_ms) != pcmk_ok) { + crm_info("Ignoring invalid request to clear failures for %s", + pcmk__s(rsc, "all resources")); + return; + } + + crm_xml_add(xml, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE); + + /* Make sure value is not set, so we delete */ + if (crm_element_value(xml, PCMK__XA_ATTR_VALUE)) { + crm_xml_replace(xml, PCMK__XA_ATTR_VALUE, NULL); + } + + g_hash_table_iter_init(&iter, attributes); + while (g_hash_table_iter_next(&iter, (gpointer *) &attr, NULL)) { + if (regexec(®ex, attr, 0, NULL, 0) == 0) { + crm_trace("Matched %s when clearing %s", + attr, pcmk__s(rsc, "all resources")); + crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr); + attrd_peer_update(peer, xml, host, false); + } + } + regfree(®ex); +} + +/*! + * \internal + * \brief Load attributes from a peer sync response + * + * \param[in] peer Peer that sent clear request + * \param[in] peer_won Whether peer is the attribute writer + * \param[in,out] xml Request XML + */ +void +attrd_peer_sync_response(const crm_node_t *peer, bool peer_won, xmlNode *xml) +{ + crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s", + peer->uname); + + if (peer_won) { + /* Initialize the "seen" flag for all attributes to cleared, so we can + * detect attributes that local node has but the writer doesn't. + */ + attrd_clear_value_seen(); + } + + // Process each attribute update in the sync response + for (xmlNode *child = pcmk__xml_first_child(xml); child != NULL; + child = pcmk__xml_next(child)) { + attrd_peer_update(peer, child, + crm_element_value(child, PCMK__XA_ATTR_NODE_NAME), + true); + } + + if (peer_won) { + /* If any attributes are still not marked as seen, the writer doesn't + * know about them, so send all peers an update with them. + */ + broadcast_unseen_local_values(); + } +} + +/*! + * \internal + * \brief Remove all attributes and optionally peer cache entries for a node + * + * \param[in] host Name of node to purge + * \param[in] uncache If true, remove node from peer caches + * \param[in] source Who requested removal (only used for logging) + */ +void +attrd_peer_remove(const char *host, bool uncache, const char *source) +{ + attribute_t *a = NULL; + GHashTableIter aIter; + + CRM_CHECK(host != NULL, return); + crm_notice("Removing all %s attributes for peer %s", host, source); + + g_hash_table_iter_init(&aIter, attributes); + while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { + if(g_hash_table_remove(a->values, host)) { + crm_debug("Removed %s[%s] for peer %s", a->id, host, source); + } + } + + if (uncache) { + crm_remote_peer_cache_remove(host); + reap_crm_member(0, host); + } +} + +void +attrd_peer_sync(crm_node_t *peer, xmlNode *xml) +{ + GHashTableIter aIter; + GHashTableIter vIter; + + attribute_t *a = NULL; + attribute_value_t *v = NULL; + xmlNode *sync = create_xml_node(NULL, __func__); + + crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); + + g_hash_table_iter_init(&aIter, attributes); + while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { + g_hash_table_iter_init(&vIter, a->values); + while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { + crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone"); + attrd_add_value_xml(sync, a, v, false); + } + } + + crm_debug("Syncing values to %s", peer?peer->uname:"everyone"); + attrd_send_message(peer, sync, false); + free_xml(sync); +} + +void +attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, + bool filter) +{ + bool handle_sync_point = false; + + if (xml_has_children(xml)) { + for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL; + child = crm_next_same_xml(child)) { + attrd_copy_xml_attributes(xml, child); + attrd_peer_update_one(peer, child, filter); + + if (attrd_request_has_sync_point(child)) { + handle_sync_point = true; + } + } + + } else { + attrd_peer_update_one(peer, xml, filter); + + if (attrd_request_has_sync_point(xml)) { + handle_sync_point = true; + } + } + + /* If the update XML specified that the client wanted to wait for a sync + * point, process that now. + */ + if (handle_sync_point) { + crm_trace("Hit local sync point for attribute update"); + attrd_ack_waitlist_clients(attrd_sync_point_local, xml); + } +} diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c new file mode 100644 index 0000000..3b6b55a --- /dev/null +++ b/daemons/attrd/attrd_elections.c @@ -0,0 +1,179 @@ +/* + * Copyright 2013-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include +#include +#include +#include + +#include "pacemaker-attrd.h" + +static char *peer_writer = NULL; +static election_t *writer = NULL; + +static gboolean +attrd_election_cb(gpointer user_data) +{ + attrd_declare_winner(); + + /* Update the peers after an election */ + attrd_peer_sync(NULL, NULL); + + /* Update the CIB after an election */ + attrd_write_attributes(true, false); + return FALSE; +} + +void +attrd_election_init(void) +{ + writer = election_init(T_ATTRD, attrd_cluster->uname, 120000, + attrd_election_cb); +} + +void +attrd_election_fini(void) +{ + election_fini(writer); +} + +void +attrd_start_election_if_needed(void) +{ + if ((peer_writer == NULL) + && (election_state(writer) != election_in_progress) + && !attrd_shutting_down()) { + + crm_info("Starting an election to determine the writer"); + election_vote(writer); + } +} + +bool +attrd_election_won(void) +{ + return (election_state(writer) == election_won); +} + +void +attrd_handle_election_op(const crm_node_t *peer, xmlNode *xml) +{ + enum election_result rc = 0; + enum election_result previous = election_state(writer); + + crm_xml_add(xml, F_CRM_HOST_FROM, peer->uname); + + // Don't become writer if we're shutting down + rc = election_count_vote(writer, xml, !attrd_shutting_down()); + + switch(rc) { + case election_start: + crm_debug("Unsetting writer (was %s) and starting new election", + peer_writer? peer_writer : "unset"); + free(peer_writer); + peer_writer = NULL; + election_vote(writer); + break; + + case election_lost: + /* The election API should really distinguish between "we just lost + * to this peer" and "we already lost previously, and we are + * discarding this vote for some reason", but it doesn't. + * + * In the first case, we want to tentatively set the peer writer to + * this peer, even though another peer may eventually win (which we + * will learn via attrd_check_for_new_writer()), so + * attrd_start_election_if_needed() doesn't start a new election. + * + * Approximate a test for that case as best as possible. + */ + if ((peer_writer == NULL) || (previous != election_lost)) { + pcmk__str_update(&peer_writer, peer->uname); + crm_debug("Election lost, presuming %s is writer for now", + peer_writer); + } + break; + + case election_in_progress: + election_check(writer); + break; + + default: + crm_info("Ignoring election op from %s due to error", peer->uname); + break; + } +} + +bool +attrd_check_for_new_writer(const crm_node_t *peer, const xmlNode *xml) +{ + int peer_state = 0; + + crm_element_value_int(xml, PCMK__XA_ATTR_WRITER, &peer_state); + if (peer_state == election_won) { + if ((election_state(writer) == election_won) + && !pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) { + crm_notice("Detected another attribute writer (%s), starting new election", + peer->uname); + election_vote(writer); + + } else if (!pcmk__str_eq(peer->uname, peer_writer, pcmk__str_casei)) { + crm_notice("Recorded new attribute writer: %s (was %s)", + peer->uname, (peer_writer? peer_writer : "unset")); + pcmk__str_update(&peer_writer, peer->uname); + } + } + return (peer_state == election_won); +} + +void +attrd_declare_winner(void) +{ + crm_notice("Recorded local node as attribute writer (was %s)", + (peer_writer? peer_writer : "unset")); + pcmk__str_update(&peer_writer, attrd_cluster->uname); +} + +void +attrd_remove_voter(const crm_node_t *peer) +{ + election_remove(writer, peer->uname); + if (peer_writer && pcmk__str_eq(peer->uname, peer_writer, pcmk__str_casei)) { + free(peer_writer); + peer_writer = NULL; + crm_notice("Lost attribute writer %s", peer->uname); + + /* Clear any election dampening in effect. Otherwise, if the lost writer + * had just won, the election could fizzle out with no new writer. + */ + election_clear_dampening(writer); + + /* If the writer received attribute updates during its shutdown, it will + * not have written them to the CIB. Ensure we get a new writer so they + * are written out. This means that every node that sees the writer + * leave will start a new election, but that's better than losing + * attributes. + */ + attrd_start_election_if_needed(); + + /* If an election is in progress, we need to call election_check(), in case + * this lost peer is the only one that hasn't voted, otherwise the election + * would be pending until it's timed out. + */ + } else if (election_state(writer) == election_in_progress) { + crm_debug("Checking election status upon loss of voter %s", peer->uname); + election_check(writer); + } +} + +void +attrd_xml_add_writer(xmlNode *xml) +{ + crm_xml_add_int(xml, PCMK__XA_ATTR_WRITER, election_state(writer)); +} diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c new file mode 100644 index 0000000..9d3dfff --- /dev/null +++ b/daemons/attrd/attrd_ipc.c @@ -0,0 +1,628 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pacemaker-attrd.h" + +static qb_ipcs_service_t *ipcs = NULL; + +/*! + * \internal + * \brief Build the XML reply to a client query + * + * param[in] attr Name of requested attribute + * param[in] host Name of requested host (or NULL for all hosts) + * + * \return New XML reply + * \note Caller is responsible for freeing the resulting XML + */ +static xmlNode *build_query_reply(const char *attr, const char *host) +{ + xmlNode *reply = create_xml_node(NULL, __func__); + attribute_t *a; + + if (reply == NULL) { + return NULL; + } + crm_xml_add(reply, F_TYPE, T_ATTRD); + crm_xml_add(reply, F_SUBTYPE, PCMK__ATTRD_CMD_QUERY); + crm_xml_add(reply, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); + + /* If desired attribute exists, add its value(s) to the reply */ + a = g_hash_table_lookup(attributes, attr); + if (a) { + attribute_value_t *v; + xmlNode *host_value; + + crm_xml_add(reply, PCMK__XA_ATTR_NAME, attr); + + /* Allow caller to use "localhost" to refer to local node */ + if (pcmk__str_eq(host, "localhost", pcmk__str_casei)) { + host = attrd_cluster->uname; + crm_trace("Mapped localhost to %s", host); + } + + /* If a specific node was requested, add its value */ + if (host) { + v = g_hash_table_lookup(a->values, host); + host_value = create_xml_node(reply, XML_CIB_TAG_NODE); + if (host_value == NULL) { + free_xml(reply); + return NULL; + } + pcmk__xe_add_node(host_value, host, 0); + crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, + (v? v->current : NULL)); + + /* Otherwise, add all nodes' values */ + } else { + GHashTableIter iter; + + g_hash_table_iter_init(&iter, a->values); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { + host_value = create_xml_node(reply, XML_CIB_TAG_NODE); + if (host_value == NULL) { + free_xml(reply); + return NULL; + } + pcmk__xe_add_node(host_value, v->nodename, 0); + crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, v->current); + } + } + } + return reply; +} + +xmlNode * +attrd_client_clear_failure(pcmk__request_t *request) +{ + xmlNode *xml = request->xml; + const char *rsc, *op, *interval_spec; + + if (minimum_protocol_version >= 2) { + /* Propagate to all peers (including ourselves). + * This ends up at attrd_peer_message(). + */ + attrd_send_message(NULL, xml, false); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } + + rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE); + op = crm_element_value(xml, PCMK__XA_ATTR_OPERATION); + interval_spec = crm_element_value(xml, PCMK__XA_ATTR_INTERVAL); + + /* Map this to an update */ + crm_xml_add(xml, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE); + + /* Add regular expression matching desired attributes */ + + if (rsc) { + char *pattern; + + if (op == NULL) { + pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc); + + } else { + guint interval_ms = crm_parse_interval_spec(interval_spec); + + pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP, + rsc, op, interval_ms); + } + + crm_xml_add(xml, PCMK__XA_ATTR_PATTERN, pattern); + free(pattern); + + } else { + crm_xml_add(xml, PCMK__XA_ATTR_PATTERN, ATTRD_RE_CLEAR_ALL); + } + + /* Make sure attribute and value are not set, so we delete via regex */ + if (crm_element_value(xml, PCMK__XA_ATTR_NAME)) { + crm_xml_replace(xml, PCMK__XA_ATTR_NAME, NULL); + } + if (crm_element_value(xml, PCMK__XA_ATTR_VALUE)) { + crm_xml_replace(xml, PCMK__XA_ATTR_VALUE, NULL); + } + + return attrd_client_update(request); +} + +xmlNode * +attrd_client_peer_remove(pcmk__request_t *request) +{ + xmlNode *xml = request->xml; + + // Host and ID are not used in combination, rather host has precedence + const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); + char *host_alloc = NULL; + + attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); + + if (host == NULL) { + int nodeid = 0; + + crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID, &nodeid); + if (nodeid > 0) { + crm_node_t *node = pcmk__search_cluster_node_cache(nodeid, NULL); + char *host_alloc = NULL; + + if (node && node->uname) { + // Use cached name if available + host = node->uname; + } else { + // Otherwise ask cluster layer + host_alloc = get_node_name(nodeid); + host = host_alloc; + } + pcmk__xe_add_node(xml, host, 0); + } + } + + if (host) { + crm_info("Client %s is requesting all values for %s be removed", + pcmk__client_name(request->ipc_client), host); + attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ + free(host_alloc); + } else { + crm_info("Ignoring request by client %s to remove all peer values without specifying peer", + pcmk__client_name(request->ipc_client)); + } + + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; +} + +xmlNode * +attrd_client_query(pcmk__request_t *request) +{ + xmlNode *query = request->xml; + xmlNode *reply = NULL; + const char *attr = NULL; + + crm_debug("Query arrived from %s", pcmk__client_name(request->ipc_client)); + + /* Request must specify attribute name to query */ + attr = crm_element_value(query, PCMK__XA_ATTR_NAME); + if (attr == NULL) { + pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, + "Ignoring malformed query from %s (no attribute name given)", + pcmk__client_name(request->ipc_client)); + return NULL; + } + + /* Build the XML reply */ + reply = build_query_reply(attr, crm_element_value(query, + PCMK__XA_ATTR_NODE_NAME)); + if (reply == NULL) { + pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, + "Could not respond to query from %s: could not create XML reply", + pcmk__client_name(request->ipc_client)); + return NULL; + } else { + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + } + + request->ipc_client->request_id = 0; + return reply; +} + +xmlNode * +attrd_client_refresh(pcmk__request_t *request) +{ + crm_info("Updating all attributes"); + + attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); + attrd_write_attributes(true, true); + + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; +} + +static void +handle_missing_host(xmlNode *xml) +{ + const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); + + if (host == NULL) { + crm_trace("Inferring host"); + pcmk__xe_add_node(xml, attrd_cluster->uname, attrd_cluster->nodeid); + } +} + +/* Convert a single IPC message with a regex into one with multiple children, one + * for each regex match. + */ +static int +expand_regexes(xmlNode *xml, const char *attr, const char *value, const char *regex) +{ + if (attr == NULL && regex) { + bool matched = false; + GHashTableIter aIter; + regex_t r_patt; + + crm_debug("Setting %s to %s", regex, value); + if (regcomp(&r_patt, regex, REG_EXTENDED|REG_NOSUB)) { + return EINVAL; + } + + g_hash_table_iter_init(&aIter, attributes); + while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) { + int status = regexec(&r_patt, attr, 0, NULL, 0); + + if (status == 0) { + xmlNode *child = create_xml_node(xml, XML_ATTR_OP); + + crm_trace("Matched %s with %s", attr, regex); + matched = true; + + /* Copy all the attributes from the parent over, but remove the + * regex and replace it with the name. + */ + attrd_copy_xml_attributes(xml, child); + crm_xml_replace(child, PCMK__XA_ATTR_PATTERN, NULL); + crm_xml_add(child, PCMK__XA_ATTR_NAME, attr); + } + } + + regfree(&r_patt); + + /* Return a code if we never matched anything. This should not be treated + * as an error. It indicates there was a regex, and it was a valid regex, + * but simply did not match anything and the caller should not continue + * doing any regex-related processing. + */ + if (!matched) { + return pcmk_rc_op_unsatisfied; + } + + } else if (attr == NULL) { + return pcmk_rc_bad_nvpair; + } + + return pcmk_rc_ok; +} + +static int +handle_regexes(pcmk__request_t *request) +{ + xmlNode *xml = request->xml; + int rc = pcmk_rc_ok; + + const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME); + const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); + const char *regex = crm_element_value(xml, PCMK__XA_ATTR_PATTERN); + + rc = expand_regexes(xml, attr, value, regex); + + if (rc == EINVAL) { + pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, + "Bad regex '%s' for update from client %s", regex, + pcmk__client_name(request->ipc_client)); + + } else if (rc == pcmk_rc_bad_nvpair) { + crm_err("Update request did not specify attribute or regular expression"); + pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, + "Client %s update request did not specify attribute or regular expression", + pcmk__client_name(request->ipc_client)); + } + + return rc; +} + +static int +handle_value_expansion(const char **value, xmlNode *xml, const char *op, + const char *attr) +{ + attribute_t *a = g_hash_table_lookup(attributes, attr); + + if (a == NULL && pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) { + return EINVAL; + } + + if (*value && attrd_value_needs_expansion(*value)) { + int int_value; + attribute_value_t *v = NULL; + + if (a) { + const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); + v = g_hash_table_lookup(a->values, host); + } + + int_value = attrd_expand_value(*value, (v? v->current : NULL)); + + crm_info("Expanded %s=%s to %d", attr, *value, int_value); + crm_xml_add_int(xml, PCMK__XA_ATTR_VALUE, int_value); + + /* Replacing the value frees the previous memory, so re-query it */ + *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); + } + + return pcmk_rc_ok; +} + +static void +send_update_msg_to_cluster(pcmk__request_t *request, xmlNode *xml) +{ + if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { + /* The client is waiting on the cluster-wide sync point. In this case, + * the response ACK is not sent until this attrd broadcasts the update + * and receives its own confirmation back from all peers. + */ + attrd_expect_confirmations(request, attrd_cluster_sync_point_update); + attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ + + } else { + /* The client is either waiting on the local sync point or was not + * waiting on any sync point at all. For the local sync point, the + * response ACK is sent in attrd_peer_update. For clients not + * waiting on any sync point, the response ACK is sent in + * handle_update_request immediately before this function was called. + */ + attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ + } +} + +static int +send_child_update(xmlNode *child, void *data) +{ + pcmk__request_t *request = (pcmk__request_t *) data; + + /* Calling pcmk__set_result is handled by one of these calls to + * attrd_client_update, so no need to do it again here. + */ + request->xml = child; + attrd_client_update(request); + return pcmk_rc_ok; +} + +xmlNode * +attrd_client_update(pcmk__request_t *request) +{ + xmlNode *xml = request->xml; + const char *attr, *value, *regex; + + /* If the message has children, that means it is a message from a newer + * client that supports sending multiple operations at a time. There are + * two ways we can handle that. + */ + if (xml_has_children(xml)) { + if (ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version)) { + /* First, if all peers support a certain protocol version, we can + * just broadcast the big message and they'll handle it. However, + * we also need to apply all the transformations in this function + * to the children since they don't happen anywhere else. + */ + for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL; + child = crm_next_same_xml(child)) { + attr = crm_element_value(child, PCMK__XA_ATTR_NAME); + value = crm_element_value(child, PCMK__XA_ATTR_VALUE); + + handle_missing_host(child); + + if (handle_value_expansion(&value, child, request->op, attr) == EINVAL) { + pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR, + "Attribute %s does not exist", attr); + return NULL; + } + } + + send_update_msg_to_cluster(request, xml); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + + } else { + /* Save the original xml node pointer so it can be restored after iterating + * over all the children. + */ + xmlNode *orig_xml = request->xml; + + /* Second, if they do not support that protocol version, split it + * up into individual messages and call attrd_client_update on + * each one. + */ + pcmk__xe_foreach_child(xml, XML_ATTR_OP, send_child_update, request); + request->xml = orig_xml; + } + + return NULL; + } + + attr = crm_element_value(xml, PCMK__XA_ATTR_NAME); + value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); + regex = crm_element_value(xml, PCMK__XA_ATTR_PATTERN); + + if (handle_regexes(request) != pcmk_rc_ok) { + /* Error handling was already dealt with in handle_regexes, so just return. */ + return NULL; + } else if (regex) { + /* Recursively call attrd_client_update on the new message with regexes + * expanded. If supported by the attribute daemon, this means that all + * matches can also be handled atomically. + */ + return attrd_client_update(request); + } + + handle_missing_host(xml); + + if (handle_value_expansion(&value, xml, request->op, attr) == EINVAL) { + pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR, + "Attribute %s does not exist", attr); + return NULL; + } + + crm_debug("Broadcasting %s[%s]=%s%s", attr, crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME), + value, (attrd_election_won()? " (writer)" : "")); + + send_update_msg_to_cluster(request, xml); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; +} + +/*! + * \internal + * \brief Accept a new client IPC connection + * + * \param[in,out] c New connection + * \param[in] uid Client user id + * \param[in] gid Client group id + * + * \return pcmk_ok on success, -errno otherwise + */ +static int32_t +attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) +{ + crm_trace("New client connection %p", c); + if (attrd_shutting_down()) { + crm_info("Ignoring new connection from pid %d during shutdown", + pcmk__client_pid(c)); + return -EPERM; + } + + if (pcmk__new_client(c, uid, gid) == NULL) { + return -EIO; + } + return pcmk_ok; +} + +/*! + * \internal + * \brief Destroy a client IPC connection + * + * \param[in] c Connection to destroy + * + * \return FALSE (i.e. do not re-run this callback) + */ +static int32_t +attrd_ipc_closed(qb_ipcs_connection_t *c) +{ + pcmk__client_t *client = pcmk__find_client(c); + + if (client == NULL) { + crm_trace("Ignoring request to clean up unknown connection %p", c); + } else { + crm_trace("Cleaning up closed client connection %p", c); + + /* Remove the client from the sync point waitlist if it's present. */ + attrd_remove_client_from_waitlist(client); + + /* And no longer wait for confirmations from any peers. */ + attrd_do_not_wait_for_client(client); + + pcmk__free_client(client); + } + + return FALSE; +} + +/*! + * \internal + * \brief Destroy a client IPC connection + * + * \param[in,out] c Connection to destroy + * + * \note We handle a destroyed connection the same as a closed one, + * but we need a separate handler because the return type is different. + */ +static void +attrd_ipc_destroy(qb_ipcs_connection_t *c) +{ + crm_trace("Destroying client connection %p", c); + attrd_ipc_closed(c); +} + +static int32_t +attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) +{ + uint32_t id = 0; + uint32_t flags = 0; + pcmk__client_t *client = pcmk__find_client(c); + xmlNode *xml = NULL; + + // Sanity-check, and parse XML from IPC data + CRM_CHECK((c != NULL) && (client != NULL), return 0); + if (data == NULL) { + crm_debug("No IPC data from PID %d", pcmk__client_pid(c)); + return 0; + } + + xml = pcmk__client_data2xml(client, data, &id, &flags); + + if (xml == NULL) { + crm_debug("Unrecognizable IPC data from PID %d", pcmk__client_pid(c)); + pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL); + return 0; + + } else { + pcmk__request_t request = { + .ipc_client = client, + .ipc_id = id, + .ipc_flags = flags, + .peer = NULL, + .xml = xml, + .call_options = 0, + .result = PCMK__UNKNOWN_RESULT, + }; + + CRM_ASSERT(client->user != NULL); + pcmk__update_acl_user(xml, PCMK__XA_ATTR_USER, client->user); + + request.op = crm_element_value_copy(request.xml, PCMK__XA_TASK); + CRM_CHECK(request.op != NULL, return 0); + + attrd_handle_request(&request); + pcmk__reset_request(&request); + } + + free_xml(xml); + return 0; +} + +static struct qb_ipcs_service_handlers ipc_callbacks = { + .connection_accept = attrd_ipc_accept, + .connection_created = NULL, + .msg_process = attrd_ipc_dispatch, + .connection_closed = attrd_ipc_closed, + .connection_destroyed = attrd_ipc_destroy +}; + +void +attrd_ipc_fini(void) +{ + if (ipcs != NULL) { + pcmk__drop_all_clients(ipcs); + qb_ipcs_destroy(ipcs); + ipcs = NULL; + } +} + +/*! + * \internal + * \brief Set up attrd IPC communication + */ +void +attrd_init_ipc(void) +{ + pcmk__serve_attrd_ipc(&ipcs, &ipc_callbacks); +} diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c new file mode 100644 index 0000000..184176a --- /dev/null +++ b/daemons/attrd/attrd_messages.c @@ -0,0 +1,328 @@ +/* + * Copyright 2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include + +#include +#include + +#include "pacemaker-attrd.h" + +int minimum_protocol_version = -1; + +static GHashTable *attrd_handlers = NULL; + +static xmlNode * +handle_unknown_request(pcmk__request_t *request) +{ + crm_err("Unknown IPC request %s from %s %s", + request->op, pcmk__request_origin_type(request), + pcmk__request_origin(request)); + pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, + "Unknown request type '%s' (bug?)", request->op); + return NULL; +} + +static xmlNode * +handle_clear_failure_request(pcmk__request_t *request) +{ + if (request->peer != NULL) { + /* It is not currently possible to receive this as a peer command, + * but will be, if we one day enable propagating this operation. + */ + attrd_peer_clear_failure(request); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } else { + if (attrd_request_has_sync_point(request->xml)) { + /* If this client supplied a sync point it wants to wait for, add it to + * the wait list. Clients on this list will not receive an ACK until + * their sync point is hit which will result in the client stalled there + * until it receives a response. + * + * All other clients will receive the expected response as normal. + */ + attrd_add_client_to_waitlist(request); + + } else { + /* If the client doesn't want to wait for a sync point, go ahead and send + * the ACK immediately. Otherwise, we'll send the ACK when the appropriate + * sync point is reached. + */ + attrd_send_ack(request->ipc_client, request->ipc_id, + request->ipc_flags); + } + + return attrd_client_clear_failure(request); + } +} + +static xmlNode * +handle_confirm_request(pcmk__request_t *request) +{ + if (request->peer != NULL) { + int callid; + + crm_debug("Received confirmation from %s", request->peer); + + if (crm_element_value_int(request->xml, XML_LRM_ATTR_CALLID, &callid) == -1) { + pcmk__set_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, + "Could not get callid from XML"); + } else { + attrd_handle_confirmation(callid, request->peer); + } + + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } else { + return handle_unknown_request(request); + } +} + +static xmlNode * +handle_flush_request(pcmk__request_t *request) +{ + if (request->peer != NULL) { + /* Ignore. The flush command was removed in 2.0.0 but may be + * received from peers running older versions. + */ + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } else { + return handle_unknown_request(request); + } +} + +static xmlNode * +handle_query_request(pcmk__request_t *request) +{ + if (request->peer != NULL) { + return handle_unknown_request(request); + } else { + return attrd_client_query(request); + } +} + +static xmlNode * +handle_remove_request(pcmk__request_t *request) +{ + if (request->peer != NULL) { + const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_NODE_NAME); + attrd_peer_remove(host, true, request->peer); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } else { + return attrd_client_peer_remove(request); + } +} + +static xmlNode * +handle_refresh_request(pcmk__request_t *request) +{ + if (request->peer != NULL) { + return handle_unknown_request(request); + } else { + return attrd_client_refresh(request); + } +} + +static xmlNode * +handle_sync_request(pcmk__request_t *request) +{ + if (request->peer != NULL) { + crm_node_t *peer = crm_get_peer(0, request->peer); + + attrd_peer_sync(peer, request->xml); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } else { + return handle_unknown_request(request); + } +} + +static xmlNode * +handle_sync_response_request(pcmk__request_t *request) +{ + if (request->ipc_client != NULL) { + return handle_unknown_request(request); + } else { + if (request->peer != NULL) { + crm_node_t *peer = crm_get_peer(0, request->peer); + bool peer_won = attrd_check_for_new_writer(peer, request->xml); + + if (!pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) { + attrd_peer_sync_response(peer, peer_won, request->xml); + } + } + + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } +} + +static xmlNode * +handle_update_request(pcmk__request_t *request) +{ + if (request->peer != NULL) { + const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_NODE_NAME); + crm_node_t *peer = crm_get_peer(0, request->peer); + + attrd_peer_update(peer, request->xml, host, false); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + + } else { + if (attrd_request_has_sync_point(request->xml)) { + /* If this client supplied a sync point it wants to wait for, add it to + * the wait list. Clients on this list will not receive an ACK until + * their sync point is hit which will result in the client stalled there + * until it receives a response. + * + * All other clients will receive the expected response as normal. + */ + attrd_add_client_to_waitlist(request); + + } else { + /* If the client doesn't want to wait for a sync point, go ahead and send + * the ACK immediately. Otherwise, we'll send the ACK when the appropriate + * sync point is reached. + * + * In the normal case, attrd_client_update can be called recursively which + * makes where to send the ACK tricky. Doing it here ensures the client + * only ever receives one. + */ + attrd_send_ack(request->ipc_client, request->ipc_id, + request->flags|crm_ipc_client_response); + } + + return attrd_client_update(request); + } +} + +static void +attrd_register_handlers(void) +{ + pcmk__server_command_t handlers[] = { + { PCMK__ATTRD_CMD_CLEAR_FAILURE, handle_clear_failure_request }, + { PCMK__ATTRD_CMD_CONFIRM, handle_confirm_request }, + { PCMK__ATTRD_CMD_FLUSH, handle_flush_request }, + { PCMK__ATTRD_CMD_PEER_REMOVE, handle_remove_request }, + { PCMK__ATTRD_CMD_QUERY, handle_query_request }, + { PCMK__ATTRD_CMD_REFRESH, handle_refresh_request }, + { PCMK__ATTRD_CMD_SYNC, handle_sync_request }, + { PCMK__ATTRD_CMD_SYNC_RESPONSE, handle_sync_response_request }, + { PCMK__ATTRD_CMD_UPDATE, handle_update_request }, + { PCMK__ATTRD_CMD_UPDATE_DELAY, handle_update_request }, + { PCMK__ATTRD_CMD_UPDATE_BOTH, handle_update_request }, + { NULL, handle_unknown_request }, + }; + + attrd_handlers = pcmk__register_handlers(handlers); +} + +void +attrd_unregister_handlers(void) +{ + if (attrd_handlers != NULL) { + g_hash_table_destroy(attrd_handlers); + attrd_handlers = NULL; + } +} + +void +attrd_handle_request(pcmk__request_t *request) +{ + xmlNode *reply = NULL; + char *log_msg = NULL; + const char *reason = NULL; + + if (attrd_handlers == NULL) { + attrd_register_handlers(); + } + + reply = pcmk__process_request(request, attrd_handlers); + + if (reply != NULL) { + crm_log_xml_trace(reply, "Reply"); + + if (request->ipc_client != NULL) { + pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply, + request->ipc_flags); + } else { + crm_err("Not sending CPG reply to client"); + } + + free_xml(reply); + } + + reason = request->result.exit_reason; + log_msg = crm_strdup_printf("Processed %s request from %s %s: %s%s%s%s", + request->op, pcmk__request_origin_type(request), + pcmk__request_origin(request), + pcmk_exec_status_str(request->result.execution_status), + (reason == NULL)? "" : " (", + pcmk__s(reason, ""), + (reason == NULL)? "" : ")"); + + if (!pcmk__result_ok(&request->result)) { + crm_warn("%s", log_msg); + } else { + crm_debug("%s", log_msg); + } + + free(log_msg); + pcmk__reset_request(request); +} + +/*! + \internal + \brief Broadcast private attribute for local node with protocol version +*/ +void +attrd_broadcast_protocol(void) +{ + xmlNode *attrd_op = create_xml_node(NULL, __func__); + + crm_xml_add(attrd_op, F_TYPE, T_ATTRD); + crm_xml_add(attrd_op, F_ORIG, crm_system_name); + crm_xml_add(attrd_op, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE); + crm_xml_add(attrd_op, PCMK__XA_ATTR_NAME, CRM_ATTR_PROTOCOL); + crm_xml_add(attrd_op, PCMK__XA_ATTR_VALUE, ATTRD_PROTOCOL_VERSION); + crm_xml_add_int(attrd_op, PCMK__XA_ATTR_IS_PRIVATE, 1); + pcmk__xe_add_node(attrd_op, attrd_cluster->uname, attrd_cluster->nodeid); + + crm_debug("Broadcasting attrd protocol version %s for node %s", + ATTRD_PROTOCOL_VERSION, attrd_cluster->uname); + + attrd_send_message(NULL, attrd_op, false); /* ends up at attrd_peer_message() */ + + free_xml(attrd_op); +} + +gboolean +attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm) +{ + const char *op = crm_element_value(data, PCMK__XA_TASK); + + crm_xml_add(data, F_TYPE, T_ATTRD); + crm_xml_add(data, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); + + /* Request a confirmation from the destination peer node (which could + * be all if node is NULL) that the message has been received and + * acted upon. + */ + if (!pcmk__str_eq(op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { + pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm); + } + + attrd_xml_add_writer(data); + return send_cluster_message(node, crm_msg_attrd, data, TRUE); +} diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c new file mode 100644 index 0000000..d59ddd5 --- /dev/null +++ b/daemons/attrd/attrd_sync.c @@ -0,0 +1,577 @@ +/* + * Copyright 2022-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include + +#include "pacemaker-attrd.h" + +/* A hash table storing clients that are waiting on a sync point to be reached. + * The key is waitlist_client - just a plain int. The obvious key would be + * the IPC client's ID, but this is not guaranteed to be unique. A single client + * could be waiting on a sync point for multiple attributes at the same time. + * + * It is not expected that this hash table will ever be especially large. + */ +static GHashTable *waitlist = NULL; +static int waitlist_client = 0; + +struct waitlist_node { + /* What kind of sync point does this node describe? */ + enum attrd_sync_point sync_point; + + /* Information required to construct and send a reply to the client. */ + char *client_id; + uint32_t ipc_id; + uint32_t flags; +}; + +/* A hash table storing information on in-progress IPC requests that are awaiting + * confirmations. These requests are currently being processed by peer attrds and + * we are waiting to receive confirmation messages from each peer indicating that + * processing is complete. + * + * Multiple requests could be waiting on confirmations at the same time. + * + * The key is the unique callid for the IPC request, and the value is a + * confirmation_action struct. + */ +static GHashTable *expected_confirmations = NULL; + +/*! + * \internal + * \brief A structure describing a single IPC request that is awaiting confirmations + */ +struct confirmation_action { + /*! + * \brief A list of peer attrds that we are waiting to receive confirmation + * messages from + * + * This list is dynamic - as confirmations arrive from peer attrds, they will + * be removed from this list. When the list is empty, all peers have processed + * the request and the associated confirmation action will be taken. + */ + GList *respondents; + + /*! + * \brief A timer that will be used to remove the client should it time out + * before receiving all confirmations + */ + mainloop_timer_t *timer; + + /*! + * \brief A function to run when all confirmations have been received + */ + attrd_confirmation_action_fn fn; + + /*! + * \brief Information required to construct and send a reply to the client + */ + char *client_id; + uint32_t ipc_id; + uint32_t flags; + + /*! + * \brief The XML request containing the callid associated with this action + */ + void *xml; +}; + +static void +next_key(void) +{ + do { + waitlist_client++; + if (waitlist_client < 0) { + waitlist_client = 1; + } + } while (g_hash_table_contains(waitlist, GINT_TO_POINTER(waitlist_client))); +} + +static void +free_waitlist_node(gpointer data) +{ + struct waitlist_node *wl = (struct waitlist_node *) data; + + free(wl->client_id); + free(wl); +} + +static const char * +sync_point_str(enum attrd_sync_point sync_point) +{ + if (sync_point == attrd_sync_point_local) { + return PCMK__VALUE_LOCAL; + } else if (sync_point == attrd_sync_point_cluster) { + return PCMK__VALUE_CLUSTER; + } else { + return "unknown"; + } +} + +/*! + * \internal + * \brief Add a client to the attrd waitlist + * + * Typically, a client receives an ACK for its XML IPC request immediately. However, + * some clients want to wait until their request has been processed and taken effect. + * This is called a sync point. Any client placed on this waitlist will have its + * ACK message delayed until either its requested sync point is hit, or until it + * times out. + * + * The XML IPC request must specify the type of sync point it wants to wait for. + * + * \param[in,out] request The request describing the client to place on the waitlist. + */ +void +attrd_add_client_to_waitlist(pcmk__request_t *request) +{ + const char *sync_point = attrd_request_sync_point(request->xml); + struct waitlist_node *wl = NULL; + + if (sync_point == NULL) { + return; + } + + if (waitlist == NULL) { + waitlist = pcmk__intkey_table(free_waitlist_node); + } + + wl = calloc(sizeof(struct waitlist_node), 1); + + CRM_ASSERT(wl != NULL); + + wl->client_id = strdup(request->ipc_client->id); + + CRM_ASSERT(wl->client_id); + + if (pcmk__str_eq(sync_point, PCMK__VALUE_LOCAL, pcmk__str_none)) { + wl->sync_point = attrd_sync_point_local; + } else if (pcmk__str_eq(sync_point, PCMK__VALUE_CLUSTER, pcmk__str_none)) { + wl->sync_point = attrd_sync_point_cluster; + } else { + free_waitlist_node(wl); + return; + } + + wl->ipc_id = request->ipc_id; + wl->flags = request->flags; + + next_key(); + pcmk__intkey_table_insert(waitlist, waitlist_client, wl); + + crm_trace("Added client %s to waitlist for %s sync point", + wl->client_id, sync_point_str(wl->sync_point)); + crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); + + /* And then add the key to the request XML so we can uniquely identify + * it when it comes time to issue the ACK. + */ + crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); +} + +/*! + * \internal + * \brief Free all memory associated with the waitlist. This is most typically + * used when attrd shuts down. + */ +void +attrd_free_waitlist(void) +{ + if (waitlist == NULL) { + return; + } + + g_hash_table_destroy(waitlist); + waitlist = NULL; +} + +/*! + * \internal + * \brief Unconditionally remove a client from the waitlist, such as when the client + * node disconnects from the cluster + * + * \param[in] client The client to remove + */ +void +attrd_remove_client_from_waitlist(pcmk__client_t *client) +{ + GHashTableIter iter; + gpointer value; + + if (waitlist == NULL) { + return; + } + + g_hash_table_iter_init(&iter, waitlist); + + while (g_hash_table_iter_next(&iter, NULL, &value)) { + struct waitlist_node *wl = (struct waitlist_node *) value; + + if (pcmk__str_eq(wl->client_id, client->id, pcmk__str_none)) { + g_hash_table_iter_remove(&iter); + crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); + } + } +} + +/*! + * \internal + * \brief Send an IPC ACK message to all awaiting clients + * + * This function will search the waitlist for all clients that are currently awaiting + * an ACK indicating their attrd operation is complete. Only those clients with a + * matching sync point type and callid from their original XML IPC request will be + * ACKed. Once they have received an ACK, they will be removed from the waitlist. + * + * \param[in] sync_point What kind of sync point have we hit? + * \param[in] xml The original XML IPC request. + */ +void +attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) +{ + int callid; + gpointer value; + + if (waitlist == NULL) { + return; + } + + if (crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid) == -1) { + crm_warn("Could not get callid from request XML"); + return; + } + + value = pcmk__intkey_table_lookup(waitlist, callid); + if (value != NULL) { + struct waitlist_node *wl = (struct waitlist_node *) value; + pcmk__client_t *client = NULL; + + if (wl->sync_point != sync_point) { + return; + } + + crm_notice("Alerting client %s for reached %s sync point", + wl->client_id, sync_point_str(wl->sync_point)); + + client = pcmk__find_client_by_id(wl->client_id); + if (client == NULL) { + return; + } + + attrd_send_ack(client, wl->ipc_id, wl->flags | crm_ipc_client_response); + + /* And then remove the client so it doesn't get alerted again. */ + pcmk__intkey_table_remove(waitlist, callid); + + crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); + } +} + +/*! + * \internal + * \brief Action to take when a cluster sync point is hit for a + * PCMK__ATTRD_CMD_UPDATE* message. + * + * \param[in] xml The request that should be passed along to + * attrd_ack_waitlist_clients. This should be the original + * IPC request containing the callid for this update message. + */ +int +attrd_cluster_sync_point_update(xmlNode *xml) +{ + crm_trace("Hit cluster sync point for attribute update"); + attrd_ack_waitlist_clients(attrd_sync_point_cluster, xml); + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Return the sync point attribute for an IPC request + * + * This function will check both the top-level element of \p xml for a sync + * point attribute, as well as all of its \p op children, if any. The latter + * is useful for newer versions of attrd that can put multiple IPC requests + * into a single message. + * + * \param[in] xml An XML IPC request + * + * \note It is assumed that if one child element has a sync point attribute, + * all will have a sync point attribute and they will all be the same + * sync point. No other configuration is supported. + * + * \return The sync point attribute of \p xml, or NULL if none. + */ +const char * +attrd_request_sync_point(xmlNode *xml) +{ + if (xml_has_children(xml)) { + xmlNode *child = pcmk__xe_match(xml, XML_ATTR_OP, PCMK__XA_ATTR_SYNC_POINT, NULL); + + if (child) { + return crm_element_value(child, PCMK__XA_ATTR_SYNC_POINT); + } else { + return NULL; + } + + } else { + return crm_element_value(xml, PCMK__XA_ATTR_SYNC_POINT); + } +} + +/*! + * \internal + * \brief Does an IPC request contain any sync point attribute? + * + * \param[in] xml An XML IPC request + * + * \return true if there's a sync point attribute, false otherwise + */ +bool +attrd_request_has_sync_point(xmlNode *xml) +{ + return attrd_request_sync_point(xml) != NULL; +} + +static void +free_action(gpointer data) +{ + struct confirmation_action *action = (struct confirmation_action *) data; + g_list_free_full(action->respondents, free); + mainloop_timer_del(action->timer); + free_xml(action->xml); + free(action->client_id); + free(action); +} + +/* Remove an IPC request from the expected_confirmations table if the peer attrds + * don't respond before the timeout is hit. We set the timeout to 15s. The exact + * number isn't critical - we just want to make sure that the table eventually gets + * cleared of things that didn't complete. + */ +static gboolean +confirmation_timeout_cb(gpointer data) +{ + struct confirmation_action *action = (struct confirmation_action *) data; + + GHashTableIter iter; + gpointer value; + + if (expected_confirmations == NULL) { + return G_SOURCE_REMOVE; + } + + g_hash_table_iter_init(&iter, expected_confirmations); + + while (g_hash_table_iter_next(&iter, NULL, &value)) { + if (value == action) { + pcmk__client_t *client = pcmk__find_client_by_id(action->client_id); + if (client == NULL) { + return G_SOURCE_REMOVE; + } + + crm_trace("Timed out waiting for confirmations for client %s", client->id); + pcmk__ipc_send_ack(client, action->ipc_id, action->flags | crm_ipc_client_response, + "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_TIMEOUT); + + g_hash_table_iter_remove(&iter); + crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); + break; + } + } + + return G_SOURCE_REMOVE; +} + +/*! + * \internal + * \brief When a peer disconnects from the cluster, no longer wait for its confirmation + * for any IPC action. If this peer is the last one being waited on, this will + * trigger the confirmation action. + * + * \param[in] host The disconnecting peer attrd's uname + */ +void +attrd_do_not_expect_from_peer(const char *host) +{ + GList *keys = NULL; + + if (expected_confirmations == NULL) { + return; + } + + keys = g_hash_table_get_keys(expected_confirmations); + + crm_trace("Removing peer %s from expected confirmations", host); + + for (GList *node = keys; node != NULL; node = node->next) { + int callid = *(int *) node->data; + attrd_handle_confirmation(callid, host); + } + + g_list_free(keys); +} + +/*! + * \internal + * \brief When a client disconnects from the cluster, no longer wait on confirmations + * for it. Because the peer attrds may still be processing the original IPC + * message, they may still send us confirmations. However, we will take no + * action on them. + * + * \param[in] client The disconnecting client + */ +void +attrd_do_not_wait_for_client(pcmk__client_t *client) +{ + GHashTableIter iter; + gpointer value; + + if (expected_confirmations == NULL) { + return; + } + + g_hash_table_iter_init(&iter, expected_confirmations); + + while (g_hash_table_iter_next(&iter, NULL, &value)) { + struct confirmation_action *action = (struct confirmation_action *) value; + + if (pcmk__str_eq(action->client_id, client->id, pcmk__str_none)) { + crm_trace("Removing client %s from expected confirmations", client->id); + g_hash_table_iter_remove(&iter); + crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); + break; + } + } +} + +/*! + * \internal + * \brief Register some action to be taken when IPC request confirmations are + * received + * + * When this function is called, a list of all peer attrds that support confirming + * requests is generated. As confirmations from these peer attrds are received, + * they are removed from this list. When the list is empty, the registered action + * will be called. + * + * \note This function should always be called before attrd_send_message is called + * to broadcast to the peers to ensure that we know what replies we are + * waiting on. Otherwise, it is possible the peer could finish and confirm + * before we know to expect it. + * + * \param[in] request The request that is awaiting confirmations + * \param[in] fn A function to be run after all confirmations are received + */ +void +attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn) +{ + struct confirmation_action *action = NULL; + GHashTableIter iter; + gpointer host, ver; + GList *respondents = NULL; + int callid; + + if (expected_confirmations == NULL) { + expected_confirmations = pcmk__intkey_table((GDestroyNotify) free_action); + } + + if (crm_element_value_int(request->xml, XML_LRM_ATTR_CALLID, &callid) == -1) { + crm_err("Could not get callid from xml"); + return; + } + + if (pcmk__intkey_table_lookup(expected_confirmations, callid)) { + crm_err("Already waiting on confirmations for call id %d", callid); + return; + } + + g_hash_table_iter_init(&iter, peer_protocol_vers); + while (g_hash_table_iter_next(&iter, &host, &ver)) { + if (ATTRD_SUPPORTS_CONFIRMATION(GPOINTER_TO_INT(ver))) { + char *s = strdup((char *) host); + + CRM_ASSERT(s != NULL); + respondents = g_list_prepend(respondents, s); + } + } + + action = calloc(1, sizeof(struct confirmation_action)); + CRM_ASSERT(action != NULL); + + action->respondents = respondents; + action->fn = fn; + action->xml = copy_xml(request->xml); + + action->client_id = strdup(request->ipc_client->id); + CRM_ASSERT(action->client_id != NULL); + + action->ipc_id = request->ipc_id; + action->flags = request->flags; + + action->timer = mainloop_timer_add(NULL, 15000, FALSE, confirmation_timeout_cb, action); + mainloop_timer_start(action->timer); + + pcmk__intkey_table_insert(expected_confirmations, callid, action); + crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(respondents)); + crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); +} + +void +attrd_free_confirmations(void) +{ + if (expected_confirmations != NULL) { + g_hash_table_destroy(expected_confirmations); + expected_confirmations = NULL; + } +} + +/*! + * \internal + * \brief Process a confirmation message from a peer attrd + * + * This function is called every time a PCMK__ATTRD_CMD_CONFIRM message is + * received from a peer attrd. If this is the last confirmation we are waiting + * on for a given operation, the registered action will be called. + * + * \param[in] callid The unique callid for the XML IPC request + * \param[in] host The confirming peer attrd's uname + */ +void +attrd_handle_confirmation(int callid, const char *host) +{ + struct confirmation_action *action = NULL; + GList *node = NULL; + + if (expected_confirmations == NULL) { + return; + } + + action = pcmk__intkey_table_lookup(expected_confirmations, callid); + if (action == NULL) { + return; + } + + node = g_list_find_custom(action->respondents, host, (GCompareFunc) strcasecmp); + + if (node == NULL) { + return; + } + + action->respondents = g_list_remove(action->respondents, node->data); + crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(action->respondents)); + + if (action->respondents == NULL) { + action->fn(action->xml); + pcmk__intkey_table_remove(expected_confirmations, callid); + crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); + } +} diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c new file mode 100644 index 0000000..7de8dd9 --- /dev/null +++ b/daemons/attrd/attrd_utils.c @@ -0,0 +1,362 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "pacemaker-attrd.h" + +cib_t *the_cib = NULL; + +static bool requesting_shutdown = false; +static bool shutting_down = false; +static GMainLoop *mloop = NULL; + +/* A hash table storing information on the protocol version of each peer attrd. + * The key is the peer's uname, and the value is the protocol version number. + */ +GHashTable *peer_protocol_vers = NULL; + +/*! + * \internal + * \brief Set requesting_shutdown state + */ +void +attrd_set_requesting_shutdown(void) +{ + requesting_shutdown = true; +} + +/*! + * \internal + * \brief Clear requesting_shutdown state + */ +void +attrd_clear_requesting_shutdown(void) +{ + requesting_shutdown = false; +} + +/*! + * \internal + * \brief Check whether we're currently requesting shutdown + * + * \return true if requesting shutdown, false otherwise + */ +bool +attrd_requesting_shutdown(void) +{ + return requesting_shutdown; +} + +/*! + * \internal + * \brief Check whether we're currently shutting down + * + * \return true if shutting down, false otherwise + */ +bool +attrd_shutting_down(void) +{ + return shutting_down; +} + +/*! + * \internal + * \brief Exit (using mainloop or not, as appropriate) + * + * \param[in] nsig Ignored + */ +void +attrd_shutdown(int nsig) +{ + // Tell various functions not to do anthing + shutting_down = true; + + // Don't respond to signals while shutting down + mainloop_destroy_signal(SIGTERM); + mainloop_destroy_signal(SIGCHLD); + mainloop_destroy_signal(SIGPIPE); + mainloop_destroy_signal(SIGUSR1); + mainloop_destroy_signal(SIGUSR2); + mainloop_destroy_signal(SIGTRAP); + + attrd_free_waitlist(); + attrd_free_confirmations(); + + if (peer_protocol_vers != NULL) { + g_hash_table_destroy(peer_protocol_vers); + peer_protocol_vers = NULL; + } + + if ((mloop == NULL) || !g_main_loop_is_running(mloop)) { + /* If there's no main loop active, just exit. This should be possible + * only if we get SIGTERM in brief windows at start-up and shutdown. + */ + crm_exit(CRM_EX_OK); + } else { + g_main_loop_quit(mloop); + g_main_loop_unref(mloop); + } +} + +/*! + * \internal + * \brief Create a main loop for attrd + */ +void +attrd_init_mainloop(void) +{ + mloop = g_main_loop_new(NULL, FALSE); +} + +/*! + * \internal + * \brief Run attrd main loop + */ +void +attrd_run_mainloop(void) +{ + g_main_loop_run(mloop); +} + +void +attrd_cib_disconnect(void) +{ + CRM_CHECK(the_cib != NULL, return); + the_cib->cmds->del_notify_callback(the_cib, T_CIB_REPLACE_NOTIFY, attrd_cib_replaced_cb); + the_cib->cmds->del_notify_callback(the_cib, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb); + cib__clean_up_connection(&the_cib); +} + +void +attrd_cib_replaced_cb(const char *event, xmlNode * msg) +{ + int change_section = cib_change_section_nodes | cib_change_section_status | cib_change_section_alerts; + + if (attrd_requesting_shutdown() || attrd_shutting_down()) { + return; + } + + crm_element_value_int(msg, F_CIB_CHANGE_SECTION, &change_section); + + if (attrd_election_won()) { + if (change_section & (cib_change_section_nodes | cib_change_section_status)) { + crm_notice("Updating all attributes after %s event", event); + attrd_write_attributes(true, false); + } + } + + if (change_section & cib_change_section_alerts) { + // Check for changes in alerts + mainloop_set_trigger(attrd_config_read); + } +} + +/* strlen("value") */ +#define plus_plus_len (5) + +/*! + * \internal + * \brief Check whether an attribute value should be expanded + * + * \param[in] value Attribute value to check + * + * \return true if value needs expansion, false otherwise + */ +bool +attrd_value_needs_expansion(const char *value) +{ + return ((strlen(value) >= (plus_plus_len + 2)) + && (value[plus_plus_len] == '+') + && ((value[plus_plus_len + 1] == '+') + || (value[plus_plus_len + 1] == '='))); +} + +/*! + * \internal + * \brief Expand an increment expression into an integer + * + * \param[in] value Attribute increment expression to expand + * \param[in] old_value Previous value of attribute + * + * \return Expanded value + */ +int +attrd_expand_value(const char *value, const char *old_value) +{ + int offset = 1; + int int_value = char2score(old_value); + + if (value[plus_plus_len + 1] != '+') { + const char *offset_s = value + (plus_plus_len + 2); + + offset = char2score(offset_s); + } + int_value += offset; + + if (int_value > INFINITY) { + int_value = INFINITY; + } + return int_value; +} + +/*! + * \internal + * \brief Create regular expression matching failure-related attributes + * + * \param[out] regex Where to store created regular expression + * \param[in] rsc Name of resource to clear (or NULL for all) + * \param[in] op Operation to clear if rsc is specified (or NULL for all) + * \param[in] interval_ms Interval of operation to clear if op is specified + * + * \return pcmk_ok on success, -EINVAL if arguments are invalid + * + * \note The caller is responsible for freeing the result with regfree(). + */ +int +attrd_failure_regex(regex_t *regex, const char *rsc, const char *op, + guint interval_ms) +{ + char *pattern = NULL; + int rc; + + /* Create a pattern that matches desired attributes */ + + if (rsc == NULL) { + pattern = strdup(ATTRD_RE_CLEAR_ALL); + } else if (op == NULL) { + pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc); + } else { + pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP, rsc, op, interval_ms); + } + + /* Compile pattern into regular expression */ + crm_trace("Clearing attributes matching %s", pattern); + rc = regcomp(regex, pattern, REG_EXTENDED|REG_NOSUB); + free(pattern); + + return (rc == 0)? pcmk_ok : -EINVAL; +} + +void +attrd_free_attribute_value(gpointer data) +{ + attribute_value_t *v = data; + + free(v->nodename); + free(v->current); + free(v->requested); + free(v); +} + +void +attrd_free_attribute(gpointer data) +{ + attribute_t *a = data; + if(a) { + free(a->id); + free(a->set_id); + free(a->set_type); + free(a->uuid); + free(a->user); + + mainloop_timer_del(a->timer); + g_hash_table_destroy(a->values); + + free(a); + } +} + +/*! + * \internal + * \brief When a peer node leaves the cluster, stop tracking its protocol version. + * + * \param[in] host The peer node's uname to be removed + */ +void +attrd_remove_peer_protocol_ver(const char *host) +{ + if (peer_protocol_vers != NULL) { + g_hash_table_remove(peer_protocol_vers, host); + } +} + +/*! + * \internal + * \brief When a peer node broadcasts a message with its protocol version, keep + * track of that information. + * + * We keep track of each peer's protocol version so we know which peers to + * expect confirmation messages from when handling cluster-wide sync points. + * We additionally keep track of the lowest protocol version supported by all + * peers so we know when we can send IPC messages containing more than one + * request. + * + * \param[in] host The peer node's uname to be tracked + * \param[in] value The peer node's protocol version + */ +void +attrd_update_minimum_protocol_ver(const char *host, const char *value) +{ + int ver; + + if (peer_protocol_vers == NULL) { + peer_protocol_vers = pcmk__strkey_table(free, NULL); + } + + pcmk__scan_min_int(value, &ver, 0); + + if (ver > 0) { + char *host_name = strdup(host); + + /* Record the peer attrd's protocol version. */ + CRM_ASSERT(host_name != NULL); + g_hash_table_insert(peer_protocol_vers, host_name, GINT_TO_POINTER(ver)); + + /* If the protocol version is a new minimum, record it as such. */ + if (minimum_protocol_version == -1 || ver < minimum_protocol_version) { + minimum_protocol_version = ver; + crm_trace("Set minimum attrd protocol version to %d", + minimum_protocol_version); + } + } +} + +void +attrd_copy_xml_attributes(xmlNode *src, xmlNode *dest) +{ + /* Copy attributes from the wrapper parent node into the child node. + * We can't just use copy_in_properties because we want to skip any + * attributes that are already set on the child. For instance, if + * we were told to use a specific node, there will already be a node + * attribute on the child. Copying the parent's node attribute over + * could result in the wrong value. + */ + for (xmlAttrPtr a = pcmk__xe_first_attr(src); a != NULL; a = a->next) { + const char *p_name = (const char *) a->name; + const char *p_value = ((a == NULL) || (a->children == NULL)) ? NULL : + (const char *) a->children->content; + + if (crm_element_value(dest, p_name) == NULL) { + crm_xml_add(dest, p_name, p_value); + } + } +} diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c new file mode 100644 index 0000000..037825b --- /dev/null +++ b/daemons/attrd/pacemaker-attrd.c @@ -0,0 +1,358 @@ +/* + * Copyright 2013-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "pacemaker-attrd.h" + +#define SUMMARY "daemon for managing Pacemaker node attributes" + +gboolean stand_alone = FALSE; +gchar **log_files = NULL; + +static GOptionEntry entries[] = { + { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone, + "(Advanced use only) Run in stand-alone mode", NULL }, + + { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, + &log_files, "Send logs to the additional named logfile", NULL }, + + { NULL } +}; + +static pcmk__output_t *out = NULL; + +static pcmk__supported_format_t formats[] = { + PCMK__SUPPORTED_FORMAT_NONE, + PCMK__SUPPORTED_FORMAT_TEXT, + PCMK__SUPPORTED_FORMAT_XML, + { NULL, NULL, NULL } +}; + +lrmd_t *the_lrmd = NULL; +crm_cluster_t *attrd_cluster = NULL; +crm_trigger_t *attrd_config_read = NULL; +crm_exit_t attrd_exit_status = CRM_EX_OK; + +static void +attrd_cib_destroy_cb(gpointer user_data) +{ + cib_t *conn = user_data; + + conn->cmds->signoff(conn); /* Ensure IPC is cleaned up */ + + if (attrd_shutting_down()) { + crm_info("Connection disconnection complete"); + + } else { + /* eventually this should trigger a reconnect, not a shutdown */ + crm_crit("Lost connection to the CIB manager, shutting down"); + attrd_exit_status = CRM_EX_DISCONNECT; + attrd_shutdown(0); + } + + return; +} + +static void +attrd_erase_cb(xmlNode *msg, int call_id, int rc, xmlNode *output, + void *user_data) +{ + do_crm_log_unlikely((rc? LOG_NOTICE : LOG_DEBUG), + "Cleared transient attributes: %s " + CRM_XS " xpath=%s rc=%d", + pcmk_strerror(rc), (char *) user_data, rc); +} + +#define XPATH_TRANSIENT "//node_state[@uname='%s']/" XML_TAG_TRANSIENT_NODEATTRS + +/*! + * \internal + * \brief Wipe all transient attributes for this node from the CIB + * + * Clear any previous transient node attributes from the CIB. This is + * normally done by the DC's controller when this node leaves the cluster, but + * this handles the case where the node restarted so quickly that the + * cluster layer didn't notice. + * + * \todo If pacemaker-attrd respawns after crashing (see PCMK_respawned), + * ideally we'd skip this and sync our attributes from the writer. + * However, currently we reject any values for us that the writer has, in + * attrd_peer_update(). + */ +static void +attrd_erase_attrs(void) +{ + int call_id; + char *xpath = crm_strdup_printf(XPATH_TRANSIENT, attrd_cluster->uname); + + crm_info("Clearing transient attributes from CIB " CRM_XS " xpath=%s", + xpath); + + call_id = the_cib->cmds->remove(the_cib, xpath, NULL, cib_xpath); + the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, xpath, + "attrd_erase_cb", attrd_erase_cb, + free); +} + +static int +attrd_cib_connect(int max_retry) +{ + static int attempts = 0; + + int rc = -ENOTCONN; + + the_cib = cib_new(); + if (the_cib == NULL) { + return -ENOTCONN; + } + + do { + if(attempts > 0) { + sleep(attempts); + } + + attempts++; + crm_debug("Connection attempt %d to the CIB manager", attempts); + rc = the_cib->cmds->signon(the_cib, T_ATTRD, cib_command); + + } while(rc != pcmk_ok && attempts < max_retry); + + if (rc != pcmk_ok) { + crm_err("Connection to the CIB manager failed: %s " CRM_XS " rc=%d", + pcmk_strerror(rc), rc); + goto cleanup; + } + + crm_debug("Connected to the CIB manager after %d attempts", attempts); + + rc = the_cib->cmds->set_connection_dnotify(the_cib, attrd_cib_destroy_cb); + if (rc != pcmk_ok) { + crm_err("Could not set disconnection callback"); + goto cleanup; + } + + rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_REPLACE_NOTIFY, attrd_cib_replaced_cb); + if(rc != pcmk_ok) { + crm_err("Could not set CIB notification callback"); + goto cleanup; + } + + rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb); + if (rc != pcmk_ok) { + crm_err("Could not set CIB notification callback (update)"); + goto cleanup; + } + + return pcmk_ok; + + cleanup: + cib__clean_up_connection(&the_cib); + return -ENOTCONN; +} + +/*! + * \internal + * \brief Prepare the CIB after cluster is connected + */ +static void +attrd_cib_init(void) +{ + // We have no attribute values in memory, wipe the CIB to match + attrd_erase_attrs(); + + // Set a trigger for reading the CIB (for the alerts section) + attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL); + + // Always read the CIB at start-up + mainloop_set_trigger(attrd_config_read); +} + +static bool +ipc_already_running(void) +{ + pcmk_ipc_api_t *old_instance = NULL; + int rc = pcmk_rc_ok; + + rc = pcmk_new_ipc_api(&old_instance, pcmk_ipc_attrd); + if (rc != pcmk_rc_ok) { + return false; + } + + rc = pcmk_connect_ipc(old_instance, pcmk_ipc_dispatch_sync); + if (rc != pcmk_rc_ok) { + pcmk_free_ipc_api(old_instance); + return false; + } + + pcmk_disconnect_ipc(old_instance); + pcmk_free_ipc_api(old_instance); + return true; +} + +static GOptionContext * +build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { + GOptionContext *context = NULL; + + context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); + pcmk__add_main_args(context, entries); + return context; +} + +int +main(int argc, char **argv) +{ + int rc = pcmk_rc_ok; + + GError *error = NULL; + bool initialized = false; + + GOptionGroup *output_group = NULL; + pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); + gchar **processed_args = pcmk__cmdline_preproc(argv, NULL); + GOptionContext *context = build_arg_context(args, &output_group); + + attrd_init_mainloop(); + crm_log_preinit(NULL, argc, argv); + mainloop_add_signal(SIGTERM, attrd_shutdown); + + pcmk__register_formats(output_group, formats); + if (!g_option_context_parse_strv(context, &processed_args, &error)) { + attrd_exit_status = CRM_EX_USAGE; + goto done; + } + + rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); + if ((rc != pcmk_rc_ok) || (out == NULL)) { + attrd_exit_status = CRM_EX_ERROR; + g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, + "Error creating output format %s: %s", + args->output_ty, pcmk_rc_str(rc)); + goto done; + } + + if (args->version) { + out->version(out, false); + goto done; + } + + // Open additional log files + pcmk__add_logfiles(log_files, out); + + crm_log_init(T_ATTRD, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); + crm_notice("Starting Pacemaker node attribute manager%s", + stand_alone ? " in standalone mode" : ""); + + if (ipc_already_running()) { + const char *msg = "pacemaker-attrd is already active, aborting startup"; + + attrd_exit_status = CRM_EX_OK; + g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, "%s", msg); + crm_err(msg); + goto done; + } + + initialized = true; + + attributes = pcmk__strkey_table(NULL, attrd_free_attribute); + + /* Connect to the CIB before connecting to the cluster or listening for IPC. + * This allows us to assume the CIB is connected whenever we process a + * cluster or IPC message (which also avoids start-up race conditions). + */ + if (!stand_alone) { + if (attrd_cib_connect(30) != pcmk_ok) { + attrd_exit_status = CRM_EX_FATAL; + g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, + "Could not connect to the CIB"); + goto done; + } + crm_info("CIB connection active"); + } + + if (attrd_cluster_connect() != pcmk_ok) { + attrd_exit_status = CRM_EX_FATAL; + g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, + "Could not connect to the cluster"); + goto done; + } + crm_info("Cluster connection active"); + + // Initialization that requires the cluster to be connected + attrd_election_init(); + + if (!stand_alone) { + attrd_cib_init(); + } + + /* Set a private attribute for ourselves with the protocol version we + * support. This lets all nodes determine the minimum supported version + * across all nodes. It also ensures that the writer learns our node name, + * so it can send our attributes to the CIB. + */ + attrd_broadcast_protocol(); + + attrd_init_ipc(); + crm_notice("Pacemaker node attribute manager successfully started and accepting connections"); + attrd_run_mainloop(); + + done: + if (initialized) { + crm_info("Shutting down attribute manager"); + + attrd_election_fini(); + attrd_ipc_fini(); + attrd_lrmd_disconnect(); + + if (!stand_alone) { + attrd_cib_disconnect(); + } + + attrd_free_waitlist(); + pcmk_cluster_free(attrd_cluster); + g_hash_table_destroy(attributes); + } + + g_strfreev(processed_args); + pcmk__free_arg_context(context); + + g_strfreev(log_files); + + pcmk__output_and_clear_error(&error, out); + + if (out != NULL) { + out->finish(out, attrd_exit_status, true, NULL); + pcmk__output_free(out); + } + pcmk__unregister_formats(); + crm_exit(attrd_exit_status); +} diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h new file mode 100644 index 0000000..329fb5a --- /dev/null +++ b/daemons/attrd/pacemaker-attrd.h @@ -0,0 +1,216 @@ +/* + * Copyright 2013-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#ifndef PACEMAKER_ATTRD__H +# define PACEMAKER_ATTRD__H + +#include +#include +#include +#include +#include +#include +#include + +/* + * Legacy attrd (all pre-1.1.11 Pacemaker versions, plus all versions when used + * with the no-longer-supported CMAN or corosync-plugin stacks) is unversioned. + * + * With atomic attrd, each attrd will send ATTRD_PROTOCOL_VERSION with every + * peer request and reply. As of Pacemaker 2.0.0, at start-up each attrd will + * also set a private attribute for itself with its version, so any attrd can + * determine the minimum version supported by all peers. + * + * Protocol Pacemaker Significant changes + * -------- --------- ------------------- + * 1 1.1.11 PCMK__ATTRD_CMD_UPDATE (PCMK__XA_ATTR_NAME only), + * PCMK__ATTRD_CMD_PEER_REMOVE, PCMK__ATTRD_CMD_REFRESH, + * PCMK__ATTRD_CMD_FLUSH, PCMK__ATTRD_CMD_SYNC, + * PCMK__ATTRD_CMD_SYNC_RESPONSE + * 1 1.1.13 PCMK__ATTRD_CMD_UPDATE (with PCMK__XA_ATTR_PATTERN), + * PCMK__ATTRD_CMD_QUERY + * 1 1.1.15 PCMK__ATTRD_CMD_UPDATE_BOTH, + * PCMK__ATTRD_CMD_UPDATE_DELAY + * 2 1.1.17 PCMK__ATTRD_CMD_CLEAR_FAILURE + * 3 2.1.1 PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes + * 4 2.1.5 Multiple attributes can be updated in a single IPC + * message + * 5 2.1.5 Peers can request confirmation of a sent message + */ +#define ATTRD_PROTOCOL_VERSION "5" + +#define ATTRD_SUPPORTS_MULTI_MESSAGE(x) ((x) >= 4) +#define ATTRD_SUPPORTS_CONFIRMATION(x) ((x) >= 5) + +#define attrd_send_ack(client, id, flags) \ + pcmk__ipc_send_ack((client), (id), (flags), "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_INDETERMINATE) + +void attrd_init_mainloop(void); +void attrd_run_mainloop(void); + +void attrd_set_requesting_shutdown(void); +void attrd_clear_requesting_shutdown(void); +void attrd_free_waitlist(void); +bool attrd_requesting_shutdown(void); +bool attrd_shutting_down(void); +void attrd_shutdown(int nsig); +void attrd_init_ipc(void); +void attrd_ipc_fini(void); + +void attrd_cib_disconnect(void); + +bool attrd_value_needs_expansion(const char *value); +int attrd_expand_value(const char *value, const char *old_value); + +/* regular expression to clear failures of all resources */ +#define ATTRD_RE_CLEAR_ALL \ + "^(" PCMK__FAIL_COUNT_PREFIX "|" PCMK__LAST_FAILURE_PREFIX ")-" + +/* regular expression to clear failure of all operations for one resource + * (format takes resource name) + * + * @COMPAT attributes set < 1.1.17: + * also match older attributes that do not have the operation part + */ +#define ATTRD_RE_CLEAR_ONE ATTRD_RE_CLEAR_ALL "%s(#.+_[0-9]+)?$" + +/* regular expression to clear failure of one operation for one resource + * (format takes resource name, operation name, and interval) + * + * @COMPAT attributes set < 1.1.17: + * also match older attributes that do not have the operation part + */ +#define ATTRD_RE_CLEAR_OP ATTRD_RE_CLEAR_ALL "%s(#%s_%u)?$" + +int attrd_failure_regex(regex_t *regex, const char *rsc, const char *op, + guint interval_ms); + +extern cib_t *the_cib; + +/* Alerts */ + +extern lrmd_t *the_lrmd; +extern crm_trigger_t *attrd_config_read; + +void attrd_lrmd_disconnect(void); +gboolean attrd_read_options(gpointer user_data); +void attrd_cib_replaced_cb(const char *event, xmlNode * msg); +void attrd_cib_updated_cb(const char *event, xmlNode *msg); +int attrd_send_attribute_alert(const char *node, int nodeid, + const char *attr, const char *value); + +// Elections +void attrd_election_init(void); +void attrd_election_fini(void); +void attrd_start_election_if_needed(void); +bool attrd_election_won(void); +void attrd_handle_election_op(const crm_node_t *peer, xmlNode *xml); +bool attrd_check_for_new_writer(const crm_node_t *peer, const xmlNode *xml); +void attrd_declare_winner(void); +void attrd_remove_voter(const crm_node_t *peer); +void attrd_xml_add_writer(xmlNode *xml); + +typedef struct attribute_s { + char *uuid; /* TODO: Remove if at all possible */ + char *id; + char *set_id; + char *set_type; + GHashTable *values; + int update; + int timeout_ms; + + /* TODO: refactor these three as a bitmask */ + bool changed; /* whether attribute value has changed since last write */ + bool unknown_peer_uuids; /* whether we know we're missing a peer uuid */ + gboolean is_private; /* whether to keep this attribute out of the CIB */ + + mainloop_timer_t *timer; + + char *user; + + gboolean force_write; /* Flag for updating attribute by ignoring delay */ + +} attribute_t; + +typedef struct attribute_value_s { + uint32_t nodeid; + gboolean is_remote; + char *nodename; + char *current; + char *requested; + gboolean seen; +} attribute_value_t; + +extern crm_cluster_t *attrd_cluster; +extern GHashTable *attributes; +extern GHashTable *peer_protocol_vers; + +#define CIB_OP_TIMEOUT_S 120 + +int attrd_cluster_connect(void); +void attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, + bool filter); +void attrd_peer_sync(crm_node_t *peer, xmlNode *xml); +void attrd_peer_remove(const char *host, bool uncache, const char *source); +void attrd_peer_clear_failure(pcmk__request_t *request); +void attrd_peer_sync_response(const crm_node_t *peer, bool peer_won, + xmlNode *xml); + +void attrd_broadcast_protocol(void); +xmlNode *attrd_client_peer_remove(pcmk__request_t *request); +xmlNode *attrd_client_clear_failure(pcmk__request_t *request); +xmlNode *attrd_client_update(pcmk__request_t *request); +xmlNode *attrd_client_refresh(pcmk__request_t *request); +xmlNode *attrd_client_query(pcmk__request_t *request); +gboolean attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm); + +xmlNode *attrd_add_value_xml(xmlNode *parent, const attribute_t *a, + const attribute_value_t *v, bool force_write); +void attrd_clear_value_seen(void); +void attrd_free_attribute(gpointer data); +void attrd_free_attribute_value(gpointer data); +attribute_t *attrd_populate_attribute(xmlNode *xml, const char *attr); + +void attrd_write_attribute(attribute_t *a, bool ignore_delay); +void attrd_write_attributes(bool all, bool ignore_delay); +void attrd_write_or_elect_attribute(attribute_t *a); + +extern int minimum_protocol_version; +void attrd_remove_peer_protocol_ver(const char *host); +void attrd_update_minimum_protocol_ver(const char *host, const char *value); + +mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr); + +void attrd_unregister_handlers(void); +void attrd_handle_request(pcmk__request_t *request); + +enum attrd_sync_point { + attrd_sync_point_local, + attrd_sync_point_cluster, +}; + +typedef int (*attrd_confirmation_action_fn)(xmlNode *); + +void attrd_add_client_to_waitlist(pcmk__request_t *request); +void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml); +int attrd_cluster_sync_point_update(xmlNode *xml); +void attrd_do_not_expect_from_peer(const char *host); +void attrd_do_not_wait_for_client(pcmk__client_t *client); +void attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn); +void attrd_free_confirmations(void); +void attrd_handle_confirmation(int callid, const char *host); +void attrd_remove_client_from_waitlist(pcmk__client_t *client); +const char *attrd_request_sync_point(xmlNode *xml); +bool attrd_request_has_sync_point(xmlNode *xml); + +void attrd_copy_xml_attributes(xmlNode *src, xmlNode *dest); + +extern gboolean stand_alone; + +#endif /* PACEMAKER_ATTRD__H */ diff --git a/daemons/based/Makefile.am b/daemons/based/Makefile.am new file mode 100644 index 0000000..053d93c --- /dev/null +++ b/daemons/based/Makefile.am @@ -0,0 +1,47 @@ +# +# Copyright 2004-2021 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. +# + +include $(top_srcdir)/mk/common.mk + +EXTRA_DIST = cib.pam + +halibdir = $(CRM_DAEMON_DIR) + +COMMONLIBS = $(top_builddir)/lib/common/libcrmcommon.la \ + $(top_builddir)/lib/cib/libcib.la + +halib_PROGRAMS = pacemaker-based + +noinst_HEADERS = pacemaker-based.h + +pacemaker_based_CFLAGS = $(CFLAGS_HARDENED_EXE) +pacemaker_based_LDFLAGS = $(LDFLAGS_HARDENED_EXE) + +pacemaker_based_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \ + $(COMMONLIBS) $(CLUSTERLIBS) + +pacemaker_based_SOURCES = pacemaker-based.c \ + based_callbacks.c \ + based_common.c \ + based_io.c \ + based_messages.c \ + based_notify.c \ + based_remote.c + +clean-generic: + rm -f *.log *.debug *.xml *~ + +if BUILD_LEGACY_LINKS +install-exec-hook: + $(MKDIR_P) -- $(DESTDIR)$(CRM_DAEMON_DIR) + cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f cib && $(LN_S) pacemaker-based cib + +uninstall-hook: + cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f cib +endif diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c new file mode 100644 index 0000000..3726caa --- /dev/null +++ b/daemons/based/based_callbacks.c @@ -0,0 +1,1696 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include + +#include +#include // uint32_t, uint64_t, UINT64_C() +#include +#include +#include // PRIu64 + +#include +#include +#include +#include + +#include +#include + +#include + +#define EXIT_ESCALATION_MS 10000 +#define OUR_NODENAME (stand_alone? "localhost" : crm_cluster->uname) + +static unsigned long cib_local_bcast_num = 0; + +typedef struct cib_local_notify_s { + xmlNode *notify_src; + char *client_id; + gboolean from_peer; + gboolean sync_reply; +} cib_local_notify_t; + +int next_client_id = 0; + +gboolean legacy_mode = FALSE; + +qb_ipcs_service_t *ipcs_ro = NULL; +qb_ipcs_service_t *ipcs_rw = NULL; +qb_ipcs_service_t *ipcs_shm = NULL; + +static void cib_process_request(xmlNode *request, gboolean privileged, + const pcmk__client_t *cib_client); + +static int cib_process_command(xmlNode *request, xmlNode **reply, + xmlNode **cib_diff, gboolean privileged); + +static gboolean cib_common_callback(qb_ipcs_connection_t *c, void *data, + size_t size, gboolean privileged); + +gboolean +cib_legacy_mode(void) +{ + return legacy_mode; +} + +static int32_t +cib_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) +{ + if (cib_shutdown_flag) { + crm_info("Ignoring new IPC client [%d] during shutdown", + pcmk__client_pid(c)); + return -EPERM; + } + + if (pcmk__new_client(c, uid, gid) == NULL) { + return -EIO; + } + return 0; +} + +static int32_t +cib_ipc_dispatch_rw(qb_ipcs_connection_t * c, void *data, size_t size) +{ + pcmk__client_t *client = pcmk__find_client(c); + + crm_trace("%p message from %s", c, client->id); + return cib_common_callback(c, data, size, TRUE); +} + +static int32_t +cib_ipc_dispatch_ro(qb_ipcs_connection_t * c, void *data, size_t size) +{ + pcmk__client_t *client = pcmk__find_client(c); + + crm_trace("%p message from %s", c, client->id); + return cib_common_callback(c, data, size, FALSE); +} + +/* Error code means? */ +static int32_t +cib_ipc_closed(qb_ipcs_connection_t * c) +{ + pcmk__client_t *client = pcmk__find_client(c); + + if (client == NULL) { + return 0; + } + crm_trace("Connection %p", c); + pcmk__free_client(client); + return 0; +} + +static void +cib_ipc_destroy(qb_ipcs_connection_t * c) +{ + crm_trace("Connection %p", c); + cib_ipc_closed(c); + if (cib_shutdown_flag) { + cib_shutdown(0); + } +} + +struct qb_ipcs_service_handlers ipc_ro_callbacks = { + .connection_accept = cib_ipc_accept, + .connection_created = NULL, + .msg_process = cib_ipc_dispatch_ro, + .connection_closed = cib_ipc_closed, + .connection_destroyed = cib_ipc_destroy +}; + +struct qb_ipcs_service_handlers ipc_rw_callbacks = { + .connection_accept = cib_ipc_accept, + .connection_created = NULL, + .msg_process = cib_ipc_dispatch_rw, + .connection_closed = cib_ipc_closed, + .connection_destroyed = cib_ipc_destroy +}; + +void +cib_common_callback_worker(uint32_t id, uint32_t flags, xmlNode * op_request, + pcmk__client_t *cib_client, gboolean privileged) +{ + const char *op = crm_element_value(op_request, F_CIB_OPERATION); + + if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) { + if (flags & crm_ipc_client_response) { + xmlNode *ack = create_xml_node(NULL, __func__); + + crm_xml_add(ack, F_CIB_OPERATION, CRM_OP_REGISTER); + crm_xml_add(ack, F_CIB_CLIENTID, cib_client->id); + pcmk__ipc_send_xml(cib_client, id, ack, flags); + cib_client->request_id = 0; + free_xml(ack); + } + return; + + } else if (pcmk__str_eq(op, T_CIB_NOTIFY, pcmk__str_none)) { + /* Update the notify filters for this client */ + int on_off = 0; + crm_exit_t status = CRM_EX_OK; + uint64_t bit = UINT64_C(0); + const char *type = crm_element_value(op_request, F_CIB_NOTIFY_TYPE); + + crm_element_value_int(op_request, F_CIB_NOTIFY_ACTIVATE, &on_off); + + crm_debug("Setting %s callbacks %s for client %s", + type, (on_off? "on" : "off"), pcmk__client_name(cib_client)); + + if (pcmk__str_eq(type, T_CIB_POST_NOTIFY, pcmk__str_casei)) { + bit = cib_notify_post; + + } else if (pcmk__str_eq(type, T_CIB_PRE_NOTIFY, pcmk__str_casei)) { + bit = cib_notify_pre; + + } else if (pcmk__str_eq(type, T_CIB_UPDATE_CONFIRM, pcmk__str_casei)) { + bit = cib_notify_confirm; + + } else if (pcmk__str_eq(type, T_CIB_DIFF_NOTIFY, pcmk__str_casei)) { + bit = cib_notify_diff; + + } else if (pcmk__str_eq(type, T_CIB_REPLACE_NOTIFY, pcmk__str_casei)) { + bit = cib_notify_replace; + + } else { + status = CRM_EX_INVALID_PARAM; + } + + if (bit != 0) { + if (on_off) { + pcmk__set_client_flags(cib_client, bit); + } else { + pcmk__clear_client_flags(cib_client, bit); + } + } + + pcmk__ipc_send_ack(cib_client, id, flags, "ack", NULL, status); + return; + } + + cib_process_request(op_request, privileged, cib_client); +} + +int32_t +cib_common_callback(qb_ipcs_connection_t * c, void *data, size_t size, gboolean privileged) +{ + uint32_t id = 0; + uint32_t flags = 0; + int call_options = 0; + pcmk__client_t *cib_client = pcmk__find_client(c); + xmlNode *op_request = pcmk__client_data2xml(cib_client, data, &id, &flags); + + if (op_request) { + crm_element_value_int(op_request, F_CIB_CALLOPTS, &call_options); + } + + if (op_request == NULL) { + crm_trace("Invalid message from %p", c); + pcmk__ipc_send_ack(cib_client, id, flags, "nack", NULL, CRM_EX_PROTOCOL); + return 0; + + } else if(cib_client == NULL) { + crm_trace("Invalid client %p", c); + return 0; + } + + if (pcmk_is_set(call_options, cib_sync_call)) { + CRM_LOG_ASSERT(flags & crm_ipc_client_response); + CRM_LOG_ASSERT(cib_client->request_id == 0); /* This means the client has two synchronous events in-flight */ + cib_client->request_id = id; /* Reply only to the last one */ + } + + if (cib_client->name == NULL) { + const char *value = crm_element_value(op_request, F_CIB_CLIENTNAME); + + if (value == NULL) { + cib_client->name = pcmk__itoa(cib_client->pid); + } else { + cib_client->name = strdup(value); + if (crm_is_daemon_name(value)) { + pcmk__set_client_flags(cib_client, cib_is_daemon); + } + } + } + + /* Allow cluster daemons more leeway before being evicted */ + if (pcmk_is_set(cib_client->flags, cib_is_daemon)) { + const char *qmax = cib_config_lookup("cluster-ipc-limit"); + + if (pcmk__set_client_queue_max(cib_client, qmax)) { + crm_trace("IPC threshold for client %s[%u] is now %u", + pcmk__client_name(cib_client), cib_client->pid, + cib_client->queue_max); + } + } + + crm_xml_add(op_request, F_CIB_CLIENTID, cib_client->id); + crm_xml_add(op_request, F_CIB_CLIENTNAME, cib_client->name); + + CRM_LOG_ASSERT(cib_client->user != NULL); + pcmk__update_acl_user(op_request, F_CIB_USER, cib_client->user); + + cib_common_callback_worker(id, flags, op_request, cib_client, privileged); + free_xml(op_request); + + return 0; +} + +static uint64_t ping_seq = 0; +static char *ping_digest = NULL; +static bool ping_modified_since = FALSE; + +static gboolean +cib_digester_cb(gpointer data) +{ + if (based_is_primary) { + char buffer[32]; + xmlNode *ping = create_xml_node(NULL, "ping"); + + ping_seq++; + free(ping_digest); + ping_digest = NULL; + ping_modified_since = FALSE; + snprintf(buffer, 32, "%" PRIu64, ping_seq); + crm_trace("Requesting peer digests (%s)", buffer); + + crm_xml_add(ping, F_TYPE, "cib"); + crm_xml_add(ping, F_CIB_OPERATION, CRM_OP_PING); + crm_xml_add(ping, F_CIB_PING_ID, buffer); + + crm_xml_add(ping, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); + send_cluster_message(NULL, crm_msg_cib, ping, TRUE); + + free_xml(ping); + } + return FALSE; +} + +static void +process_ping_reply(xmlNode *reply) +{ + uint64_t seq = 0; + const char *host = crm_element_value(reply, F_ORIG); + + xmlNode *pong = get_message_xml(reply, F_CIB_CALLDATA); + const char *seq_s = crm_element_value(pong, F_CIB_PING_ID); + const char *digest = crm_element_value(pong, XML_ATTR_DIGEST); + + if (seq_s == NULL) { + crm_debug("Ignoring ping reply with no " F_CIB_PING_ID); + return; + + } else { + long long seq_ll; + + if (pcmk__scan_ll(seq_s, &seq_ll, 0LL) != pcmk_rc_ok) { + return; + } + seq = (uint64_t) seq_ll; + } + + if(digest == NULL) { + crm_trace("Ignoring ping reply %s from %s with no digest", seq_s, host); + + } else if(seq != ping_seq) { + crm_trace("Ignoring out of sequence ping reply %s from %s", seq_s, host); + + } else if(ping_modified_since) { + crm_trace("Ignoring ping reply %s from %s: cib updated since", seq_s, host); + + } else { + const char *version = crm_element_value(pong, XML_ATTR_CRM_VERSION); + + if(ping_digest == NULL) { + crm_trace("Calculating new digest"); + ping_digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, version); + } + + crm_trace("Processing ping reply %s from %s (%s)", seq_s, host, digest); + if (!pcmk__str_eq(ping_digest, digest, pcmk__str_casei)) { + xmlNode *remote_cib = get_message_xml(pong, F_CIB_CALLDATA); + + crm_notice("Local CIB %s.%s.%s.%s differs from %s: %s.%s.%s.%s %p", + crm_element_value(the_cib, XML_ATTR_GENERATION_ADMIN), + crm_element_value(the_cib, XML_ATTR_GENERATION), + crm_element_value(the_cib, XML_ATTR_NUMUPDATES), + ping_digest, host, + remote_cib?crm_element_value(remote_cib, XML_ATTR_GENERATION_ADMIN):"_", + remote_cib?crm_element_value(remote_cib, XML_ATTR_GENERATION):"_", + remote_cib?crm_element_value(remote_cib, XML_ATTR_NUMUPDATES):"_", + digest, remote_cib); + + if(remote_cib && remote_cib->children) { + // Additional debug + xml_calculate_changes(the_cib, remote_cib); + + pcmk__output_set_log_level(logger_out, LOG_INFO); + pcmk__xml_show_changes(logger_out, remote_cib); + crm_trace("End of differences"); + } + + free_xml(remote_cib); + sync_our_cib(reply, FALSE); + } + } +} + +static void +do_local_notify(xmlNode * notify_src, const char *client_id, + gboolean sync_reply, gboolean from_peer) +{ + int rid = 0; + int call_id = 0; + pcmk__client_t *client_obj = NULL; + + CRM_ASSERT(notify_src && client_id); + + crm_element_value_int(notify_src, F_CIB_CALLID, &call_id); + + client_obj = pcmk__find_client_by_id(client_id); + if (client_obj == NULL) { + crm_debug("Could not send response %d: client %s not found", + call_id, client_id); + return; + } + + if (sync_reply) { + if (client_obj->ipcs) { + CRM_LOG_ASSERT(client_obj->request_id); + + rid = client_obj->request_id; + client_obj->request_id = 0; + + crm_trace("Sending response %d to client %s%s", + rid, pcmk__client_name(client_obj), + (from_peer? " (originator of delegated request)" : "")); + } else { + crm_trace("Sending response (call %d) to client %s%s", + call_id, pcmk__client_name(client_obj), + (from_peer? " (originator of delegated request)" : "")); + } + + } else { + crm_trace("Sending event %d to client %s%s", + call_id, pcmk__client_name(client_obj), + (from_peer? " (originator of delegated request)" : "")); + } + + switch (PCMK__CLIENT_TYPE(client_obj)) { + case pcmk__client_ipc: + { + int rc = pcmk__ipc_send_xml(client_obj, rid, notify_src, + (sync_reply? crm_ipc_flags_none + : crm_ipc_server_event)); + + if (rc != pcmk_rc_ok) { + crm_warn("%s reply to client %s failed: %s " CRM_XS " rc=%d", + (sync_reply? "Synchronous" : "Asynchronous"), + pcmk__client_name(client_obj), pcmk_rc_str(rc), + rc); + } + } + break; +#ifdef HAVE_GNUTLS_GNUTLS_H + case pcmk__client_tls: +#endif + case pcmk__client_tcp: + pcmk__remote_send_xml(client_obj->remote, notify_src); + break; + default: + crm_err("Unknown transport for client %s " + CRM_XS " flags=%#016" PRIx64, + pcmk__client_name(client_obj), client_obj->flags); + } +} + +static void +local_notify_destroy_callback(gpointer data) +{ + cib_local_notify_t *notify = data; + + free_xml(notify->notify_src); + free(notify->client_id); + free(notify); +} + +static void +check_local_notify(int bcast_id) +{ + cib_local_notify_t *notify = NULL; + + if (!local_notify_queue) { + return; + } + + notify = pcmk__intkey_table_lookup(local_notify_queue, bcast_id); + + if (notify) { + do_local_notify(notify->notify_src, notify->client_id, notify->sync_reply, + notify->from_peer); + pcmk__intkey_table_remove(local_notify_queue, bcast_id); + } +} + +static void +queue_local_notify(xmlNode * notify_src, const char *client_id, gboolean sync_reply, + gboolean from_peer) +{ + cib_local_notify_t *notify = calloc(1, sizeof(cib_local_notify_t)); + + notify->notify_src = notify_src; + notify->client_id = strdup(client_id); + notify->sync_reply = sync_reply; + notify->from_peer = from_peer; + + if (!local_notify_queue) { + local_notify_queue = pcmk__intkey_table(local_notify_destroy_callback); + } + pcmk__intkey_table_insert(local_notify_queue, cib_local_bcast_num, notify); + // cppcheck doesn't know notify will get freed when hash table is destroyed + // cppcheck-suppress memleak +} + +static void +parse_local_options_v1(const pcmk__client_t *cib_client, int call_type, + int call_options, const char *host, const char *op, + gboolean *local_notify, gboolean *needs_reply, + gboolean *process, gboolean *needs_forward) +{ + if (cib_op_modifies(call_type) + && !(call_options & cib_inhibit_bcast)) { + /* we need to send an update anyway */ + *needs_reply = TRUE; + } else { + *needs_reply = FALSE; + } + + if (host == NULL && (call_options & cib_scope_local)) { + crm_trace("Processing locally scoped %s op from client %s", + op, pcmk__client_name(cib_client)); + *local_notify = TRUE; + + } else if ((host == NULL) && based_is_primary) { + crm_trace("Processing %s op locally from client %s as primary", + op, pcmk__client_name(cib_client)); + *local_notify = TRUE; + + } else if (pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei)) { + crm_trace("Processing locally addressed %s op from client %s", + op, pcmk__client_name(cib_client)); + *local_notify = TRUE; + + } else if (stand_alone) { + *needs_forward = FALSE; + *local_notify = TRUE; + *process = TRUE; + + } else { + crm_trace("%s op from %s needs to be forwarded to client %s", + op, pcmk__client_name(cib_client), + pcmk__s(host, "the primary instance")); + *needs_forward = TRUE; + *process = FALSE; + } +} + +static void +parse_local_options_v2(const pcmk__client_t *cib_client, int call_type, + int call_options, const char *host, const char *op, + gboolean *local_notify, gboolean *needs_reply, + gboolean *process, gboolean *needs_forward) +{ + if (cib_op_modifies(call_type)) { + if (pcmk__str_any_of(op, PCMK__CIB_REQUEST_PRIMARY, + PCMK__CIB_REQUEST_SECONDARY, NULL)) { + /* Always handle these locally */ + *process = TRUE; + *needs_reply = FALSE; + *local_notify = TRUE; + *needs_forward = FALSE; + return; + + } else { + /* Redirect all other updates via CPG */ + *needs_reply = TRUE; + *needs_forward = TRUE; + *process = FALSE; + crm_trace("%s op from %s needs to be forwarded to client %s", + op, pcmk__client_name(cib_client), + pcmk__s(host, "the primary instance")); + return; + } + } + + + *process = TRUE; + *needs_reply = FALSE; + *local_notify = TRUE; + *needs_forward = FALSE; + + if (stand_alone) { + crm_trace("Processing %s op from client %s (stand-alone)", + op, pcmk__client_name(cib_client)); + + } else if (host == NULL) { + crm_trace("Processing unaddressed %s op from client %s", + op, pcmk__client_name(cib_client)); + + } else if (pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei)) { + crm_trace("Processing locally addressed %s op from client %s", + op, pcmk__client_name(cib_client)); + + } else { + crm_trace("%s op from %s needs to be forwarded to client %s", + op, pcmk__client_name(cib_client), host); + *needs_forward = TRUE; + *process = FALSE; + } +} + +static void +parse_local_options(const pcmk__client_t *cib_client, int call_type, + int call_options, const char *host, const char *op, + gboolean *local_notify, gboolean *needs_reply, + gboolean *process, gboolean *needs_forward) +{ + if(cib_legacy_mode()) { + parse_local_options_v1(cib_client, call_type, call_options, host, + op, local_notify, needs_reply, process, needs_forward); + } else { + parse_local_options_v2(cib_client, call_type, call_options, host, + op, local_notify, needs_reply, process, needs_forward); + } +} + +static gboolean +parse_peer_options_v1(int call_type, xmlNode * request, + gboolean * local_notify, gboolean * needs_reply, gboolean * process, + gboolean * needs_forward) +{ + const char *op = NULL; + const char *host = NULL; + const char *delegated = NULL; + const char *originator = crm_element_value(request, F_ORIG); + const char *reply_to = crm_element_value(request, F_CIB_ISREPLY); + + gboolean is_reply = pcmk__str_eq(reply_to, OUR_NODENAME, pcmk__str_casei); + + if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) { + *needs_reply = FALSE; + if (is_reply) { + *local_notify = TRUE; + crm_trace("Processing global/peer update from %s" + " that originated from us", originator); + } else { + crm_trace("Processing global/peer update from %s", originator); + } + return TRUE; + } + + op = crm_element_value(request, F_CIB_OPERATION); + crm_trace("Processing %s request sent by %s", op, originator); + if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SHUTDOWN, pcmk__str_none)) { + /* Always process these */ + *local_notify = FALSE; + if (reply_to == NULL || is_reply) { + *process = TRUE; + } + if (is_reply) { + *needs_reply = FALSE; + } + return *process; + } + + if (is_reply && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) { + process_ping_reply(request); + return FALSE; + } + + if (is_reply) { + crm_trace("Forward reply sent from %s to local clients", originator); + *process = FALSE; + *needs_reply = FALSE; + *local_notify = TRUE; + return TRUE; + } + + host = crm_element_value(request, F_CIB_HOST); + if (pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei)) { + crm_trace("Processing %s request sent to us from %s", op, originator); + return TRUE; + + } else if(is_reply == FALSE && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) { + crm_trace("Processing %s request sent to %s by %s", op, host?host:"everyone", originator); + *needs_reply = TRUE; + return TRUE; + + } else if ((host == NULL) && based_is_primary) { + crm_trace("Processing %s request sent to primary instance from %s", + op, originator); + return TRUE; + } + + delegated = crm_element_value(request, F_CIB_DELEGATED); + if (delegated != NULL) { + crm_trace("Ignoring message for primary instance"); + + } else if (host != NULL) { + /* this is for a specific instance and we're not it */ + crm_trace("Ignoring msg for instance on %s", host); + + } else if ((reply_to == NULL) && !based_is_primary) { + // This is for the primary instance, and we're not it + crm_trace("Ignoring reply for primary instance"); + + } else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SHUTDOWN, pcmk__str_none)) { + if (reply_to != NULL) { + crm_debug("Processing %s from %s", op, originator); + *needs_reply = FALSE; + + } else { + crm_debug("Processing %s reply from %s", op, originator); + } + return TRUE; + + } else { + crm_err("Nothing for us to do?"); + crm_log_xml_err(request, "Peer[inbound]"); + } + + return FALSE; +} + +static gboolean +parse_peer_options_v2(int call_type, xmlNode * request, + gboolean * local_notify, gboolean * needs_reply, gboolean * process, + gboolean * needs_forward) +{ + const char *host = NULL; + const char *delegated = crm_element_value(request, F_CIB_DELEGATED); + const char *op = crm_element_value(request, F_CIB_OPERATION); + const char *originator = crm_element_value(request, F_ORIG); + const char *reply_to = crm_element_value(request, F_CIB_ISREPLY); + + gboolean is_reply = pcmk__str_eq(reply_to, OUR_NODENAME, pcmk__str_casei); + + if (pcmk__str_eq(op, PCMK__CIB_REQUEST_REPLACE, pcmk__str_none)) { + /* sync_our_cib() sets F_CIB_ISREPLY */ + if (reply_to) { + delegated = reply_to; + } + goto skip_is_reply; + + } else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SYNC_TO_ALL, + pcmk__str_none)) { + // Nothing to do + + } else if (is_reply && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) { + process_ping_reply(request); + return FALSE; + + } else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_UPGRADE, pcmk__str_none)) { + /* Only the DC (node with the oldest software) should process + * this operation if F_CIB_SCHEMA_MAX is unset + * + * If the DC is happy it will then send out another + * PCMK__CIB_REQUEST_UPGRADE which will tell all nodes to do the actual + * upgrade. + * + * Except this time F_CIB_SCHEMA_MAX will be set which puts a + * limit on how far newer nodes will go + */ + const char *max = crm_element_value(request, F_CIB_SCHEMA_MAX); + const char *upgrade_rc = crm_element_value(request, F_CIB_UPGRADE_RC); + + crm_trace("Parsing %s operation%s for %s with max=%s and upgrade_rc=%s", + op, (is_reply? " reply" : ""), + (based_is_primary? "primary" : "secondary"), + (max? max : "none"), (upgrade_rc? upgrade_rc : "none")); + + if (upgrade_rc != NULL) { + // Our upgrade request was rejected by DC, notify clients of result + crm_xml_add(request, F_CIB_RC, upgrade_rc); + + } else if ((max == NULL) && based_is_primary) { + /* We are the DC, check if this upgrade is allowed */ + goto skip_is_reply; + + } else if(max) { + /* Ok, go ahead and upgrade to 'max' */ + goto skip_is_reply; + + } else { + // Ignore broadcast client requests when we're not DC + return FALSE; + } + + } else if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) { + crm_info("Detected legacy %s global update from %s", op, originator); + send_sync_request(NULL); + legacy_mode = TRUE; + return FALSE; + + } else if (is_reply && cib_op_modifies(call_type)) { + crm_trace("Ignoring legacy %s reply sent from %s to local clients", op, originator); + return FALSE; + + } else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SHUTDOWN, pcmk__str_none)) { + /* Legacy handling */ + crm_debug("Legacy handling of %s message from %s", op, originator); + *local_notify = FALSE; + if (reply_to == NULL) { + *process = TRUE; + } + return *process; + } + + if(is_reply) { + crm_trace("Handling %s reply sent from %s to local clients", op, originator); + *process = FALSE; + *needs_reply = FALSE; + *local_notify = TRUE; + return TRUE; + } + + skip_is_reply: + *process = TRUE; + *needs_reply = FALSE; + + *local_notify = pcmk__str_eq(delegated, OUR_NODENAME, pcmk__str_casei); + + host = crm_element_value(request, F_CIB_HOST); + if (pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei)) { + crm_trace("Processing %s request sent to us from %s", op, originator); + *needs_reply = TRUE; + return TRUE; + + } else if (host != NULL) { + /* this is for a specific instance and we're not it */ + crm_trace("Ignoring %s operation for instance on %s", op, host); + return FALSE; + + } else if(is_reply == FALSE && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) { + *needs_reply = TRUE; + } + + crm_trace("Processing %s request sent to everyone by %s/%s on %s %s", op, + crm_element_value(request, F_CIB_CLIENTNAME), + crm_element_value(request, F_CIB_CALLID), + originator, (*local_notify)?"(notify)":""); + return TRUE; +} + +static gboolean +parse_peer_options(int call_type, xmlNode * request, + gboolean * local_notify, gboolean * needs_reply, gboolean * process, + gboolean * needs_forward) +{ + /* TODO: What happens when an update comes in after node A + * requests the CIB from node B, but before it gets the reply (and + * sends out the replace operation) + */ + if(cib_legacy_mode()) { + return parse_peer_options_v1( + call_type, request, local_notify, needs_reply, process, needs_forward); + } else { + return parse_peer_options_v2( + call_type, request, local_notify, needs_reply, process, needs_forward); + } +} + +static void +forward_request(xmlNode *request, int call_options) +{ + const char *op = crm_element_value(request, F_CIB_OPERATION); + const char *host = crm_element_value(request, F_CIB_HOST); + + crm_xml_add(request, F_CIB_DELEGATED, OUR_NODENAME); + + if (host != NULL) { + crm_trace("Forwarding %s op to %s", op, host); + send_cluster_message(crm_get_peer(0, host), crm_msg_cib, request, FALSE); + + } else { + crm_trace("Forwarding %s op to primary instance", op); + send_cluster_message(NULL, crm_msg_cib, request, FALSE); + } + + /* Return the request to its original state */ + xml_remove_prop(request, F_CIB_DELEGATED); + + if (call_options & cib_discard_reply) { + crm_trace("Client not interested in reply"); + } +} + +static gboolean +send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gboolean broadcast) +{ + CRM_ASSERT(msg != NULL); + + if (broadcast) { + /* this (successful) call modified the CIB _and_ the + * change needs to be broadcast... + * send via HA to other nodes + */ + int diff_add_updates = 0; + int diff_add_epoch = 0; + int diff_add_admin_epoch = 0; + + int diff_del_updates = 0; + int diff_del_epoch = 0; + int diff_del_admin_epoch = 0; + + const char *digest = NULL; + int format = 1; + + CRM_LOG_ASSERT(result_diff != NULL); + digest = crm_element_value(result_diff, XML_ATTR_DIGEST); + crm_element_value_int(result_diff, "format", &format); + + cib_diff_version_details(result_diff, + &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, + &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); + + crm_trace("Sending update diff %d.%d.%d -> %d.%d.%d %s", + diff_del_admin_epoch, diff_del_epoch, diff_del_updates, + diff_add_admin_epoch, diff_add_epoch, diff_add_updates, digest); + + crm_xml_add(msg, F_CIB_ISREPLY, originator); + pcmk__xe_set_bool_attr(msg, F_CIB_GLOBAL_UPDATE, true); + crm_xml_add(msg, F_CIB_OPERATION, PCMK__CIB_REQUEST_APPLY_PATCH); + crm_xml_add(msg, F_CIB_USER, CRM_DAEMON_USER); + + if (format == 1) { + CRM_ASSERT(digest != NULL); + } + + add_message_xml(msg, F_CIB_UPDATE_DIFF, result_diff); + crm_log_xml_explicit(msg, "copy"); + return send_cluster_message(NULL, crm_msg_cib, msg, TRUE); + + } else if (originator != NULL) { + /* send reply via HA to originating node */ + crm_trace("Sending request result to %s only", originator); + crm_xml_add(msg, F_CIB_ISREPLY, originator); + return send_cluster_message(crm_get_peer(0, originator), crm_msg_cib, msg, FALSE); + } + + return FALSE; +} + +/*! + * \internal + * \brief Handle an IPC or CPG message containing a request + * + * \param[in,out] request Request XML + * \param[in] privileged Whether privileged commands may be run + * (see cib_server_ops[] definition) + * \param[in] cib_client IPC client that sent request (or NULL if CPG) + */ +static void +cib_process_request(xmlNode *request, gboolean privileged, + const pcmk__client_t *cib_client) +{ + int call_type = 0; + int call_options = 0; + + gboolean process = TRUE; // Whether to process request locally now + gboolean is_update = TRUE; // Whether request would modify CIB + gboolean needs_reply = TRUE; // Whether to build a reply + gboolean local_notify = FALSE; // Whether to notify (local) requester + gboolean needs_forward = FALSE; // Whether to forward request somewhere else + + xmlNode *op_reply = NULL; + xmlNode *result_diff = NULL; + + int rc = pcmk_ok; + const char *op = crm_element_value(request, F_CIB_OPERATION); + const char *originator = crm_element_value(request, F_ORIG); + const char *host = crm_element_value(request, F_CIB_HOST); + const char *target = NULL; + const char *call_id = crm_element_value(request, F_CIB_CALLID); + const char *client_id = crm_element_value(request, F_CIB_CLIENTID); + const char *client_name = crm_element_value(request, F_CIB_CLIENTNAME); + const char *reply_to = crm_element_value(request, F_CIB_ISREPLY); + + crm_element_value_int(request, F_CIB_CALLOPTS, &call_options); + + if ((host != NULL) && (*host == '\0')) { + host = NULL; + } + + if (host) { + target = host; + + } else if (call_options & cib_scope_local) { + target = "local host"; + + } else { + target = "primary"; + } + + if (cib_client == NULL) { + crm_trace("Processing peer %s operation from %s/%s on %s intended for %s (reply=%s)", + op, client_name, call_id, originator, target, reply_to); + } else { + crm_xml_add(request, F_ORIG, OUR_NODENAME); + crm_trace("Processing local %s operation from %s/%s intended for %s", op, client_name, call_id, target); + } + + rc = cib_get_operation_id(op, &call_type); + if (rc != pcmk_ok) { + /* TODO: construct error reply? */ + crm_err("Pre-processing of command failed: %s", pcmk_strerror(rc)); + return; + } + + if (cib_client != NULL) { + parse_local_options(cib_client, call_type, call_options, host, op, + &local_notify, &needs_reply, &process, &needs_forward); + + } else if (parse_peer_options(call_type, request, &local_notify, + &needs_reply, &process, &needs_forward) == FALSE) { + return; + } + + is_update = cib_op_modifies(call_type); + + if (call_options & cib_discard_reply) { + /* If the request will modify the CIB, and we are in legacy mode, we + * need to build a reply so we can broadcast a diff, even if the + * requester doesn't want one. + */ + needs_reply = is_update && cib_legacy_mode(); + local_notify = FALSE; + } + + if (needs_forward) { + const char *section = crm_element_value(request, F_CIB_SECTION); + int log_level = LOG_INFO; + + if (pcmk__str_eq(op, PCMK__CIB_REQUEST_NOOP, pcmk__str_none)) { + log_level = LOG_DEBUG; + } + + do_crm_log(log_level, + "Forwarding %s operation for section %s to %s (origin=%s/%s/%s)", + op, + section ? section : "'all'", + pcmk__s(host, (cib_legacy_mode() ? "primary" : "all")), + originator ? originator : "local", + client_name, call_id); + + forward_request(request, call_options); + return; + } + + if (cib_status != pcmk_ok) { + const char *call = crm_element_value(request, F_CIB_CALLID); + + rc = cib_status; + crm_err("Operation ignored, cluster configuration is invalid." + " Please repair and restart: %s", pcmk_strerror(cib_status)); + + op_reply = create_xml_node(NULL, "cib-reply"); + crm_xml_add(op_reply, F_TYPE, T_CIB); + crm_xml_add(op_reply, F_CIB_OPERATION, op); + crm_xml_add(op_reply, F_CIB_CALLID, call); + crm_xml_add(op_reply, F_CIB_CLIENTID, client_id); + crm_xml_add_int(op_reply, F_CIB_CALLOPTS, call_options); + crm_xml_add_int(op_reply, F_CIB_RC, rc); + + crm_trace("Attaching reply output"); + add_message_xml(op_reply, F_CIB_CALLDATA, the_cib); + + crm_log_xml_explicit(op_reply, "cib:reply"); + + } else if (process) { + time_t finished = 0; + time_t now = time(NULL); + int level = LOG_INFO; + const char *section = crm_element_value(request, F_CIB_SECTION); + + rc = cib_process_command(request, &op_reply, &result_diff, privileged); + + if (!is_update) { + level = LOG_TRACE; + + } else if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) { + switch (rc) { + case pcmk_ok: + level = LOG_INFO; + break; + case -pcmk_err_old_data: + case -pcmk_err_diff_resync: + case -pcmk_err_diff_failed: + level = LOG_TRACE; + break; + default: + level = LOG_ERR; + } + + } else if (rc != pcmk_ok) { + level = LOG_WARNING; + } + + do_crm_log(level, + "Completed %s operation for section %s: %s (rc=%d, origin=%s/%s/%s, version=%s.%s.%s)", + op, section ? section : "'all'", pcmk_strerror(rc), rc, + originator ? originator : "local", client_name, call_id, + the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION_ADMIN) : "0", + the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION) : "0", + the_cib ? crm_element_value(the_cib, XML_ATTR_NUMUPDATES) : "0"); + + finished = time(NULL); + if ((finished - now) > 3) { + crm_trace("%s operation took %lds to complete", op, (long)(finished - now)); + crm_write_blackbox(0, NULL); + } + + if (op_reply == NULL && (needs_reply || local_notify)) { + crm_err("Unexpected NULL reply to message"); + crm_log_xml_err(request, "null reply"); + needs_reply = FALSE; + local_notify = FALSE; + } + } + + if (is_update && !cib_legacy_mode()) { + crm_trace("Completed pre-sync update from %s/%s/%s%s", + originator ? originator : "local", client_name, call_id, + local_notify?" with local notification":""); + + } else if (!needs_reply || stand_alone) { + // This was a non-originating secondary update + crm_trace("Completed update as secondary"); + + } else if (cib_legacy_mode() && + rc == pcmk_ok && result_diff != NULL && !(call_options & cib_inhibit_bcast)) { + gboolean broadcast = FALSE; + + cib_local_bcast_num++; + crm_xml_add_int(request, F_CIB_LOCAL_NOTIFY_ID, cib_local_bcast_num); + broadcast = send_peer_reply(request, result_diff, originator, TRUE); + + if (broadcast && client_id && local_notify && op_reply) { + + /* If we have been asked to sync the reply, + * and a bcast msg has gone out, we queue the local notify + * until we know the bcast message has been received */ + local_notify = FALSE; + crm_trace("Queuing local %ssync notification for %s", + (call_options & cib_sync_call) ? "" : "a-", client_id); + + queue_local_notify(op_reply, client_id, + pcmk_is_set(call_options, cib_sync_call), + (cib_client == NULL)); + op_reply = NULL; /* the reply is queued, so don't free here */ + } + + } else if (call_options & cib_discard_reply) { + crm_trace("Caller isn't interested in reply"); + + } else if (cib_client == NULL) { + if (is_update == FALSE || result_diff == NULL) { + crm_trace("Request not broadcast: R/O call"); + + } else if (call_options & cib_inhibit_bcast) { + crm_trace("Request not broadcast: inhibited"); + + } else if (rc != pcmk_ok) { + crm_trace("Request not broadcast: call failed: %s", pcmk_strerror(rc)); + + } else { + crm_trace("Directing reply to %s", originator); + } + + send_peer_reply(op_reply, result_diff, originator, FALSE); + } + + if (local_notify && client_id) { + crm_trace("Performing local %ssync notification for %s", + (pcmk_is_set(call_options, cib_sync_call)? "" : "a"), + client_id); + if (process == FALSE) { + do_local_notify(request, client_id, + pcmk_is_set(call_options, cib_sync_call), + (cib_client == NULL)); + } else { + do_local_notify(op_reply, client_id, + pcmk_is_set(call_options, cib_sync_call), + (cib_client == NULL)); + } + } + + free_xml(op_reply); + free_xml(result_diff); + + return; +} + +static char * +calculate_section_digest(const char *xpath, xmlNode * xml_obj) +{ + xmlNode *xml_section = NULL; + + if (xml_obj == NULL) { + return NULL; + } + + xml_section = get_xpath_object(xpath, xml_obj, LOG_TRACE); + if (xml_section == NULL) { + return NULL; + } + return calculate_xml_versioned_digest(xml_section, FALSE, TRUE, CRM_FEATURE_SET); + +} + +// v1 and v2 patch formats +#define XPATH_CONFIG_CHANGE \ + "//" XML_CIB_TAG_CRMCONFIG " | " \ + "//" XML_DIFF_CHANGE \ + "[contains(@" XML_DIFF_PATH ",'/" XML_CIB_TAG_CRMCONFIG "/')]" + +static bool +contains_config_change(xmlNode *diff) +{ + bool changed = false; + + if (diff) { + xmlXPathObject *xpathObj = xpath_search(diff, XPATH_CONFIG_CHANGE); + + if (numXpathResults(xpathObj) > 0) { + changed = true; + } + freeXpathObject(xpathObj); + } + return changed; +} + +static int +cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gboolean privileged) +{ + xmlNode *input = NULL; + xmlNode *output = NULL; + xmlNode *result_cib = NULL; + xmlNode *current_cib = NULL; + + int call_type = 0; + int call_options = 0; + + const char *op = NULL; + const char *section = NULL; + const char *call_id = crm_element_value(request, F_CIB_CALLID); + const char *client_id = crm_element_value(request, F_CIB_CLIENTID); + const char *client_name = crm_element_value(request, F_CIB_CLIENTNAME); + const char *origin = crm_element_value(request, F_ORIG); + + int rc = pcmk_ok; + int rc2 = pcmk_ok; + + gboolean send_r_notify = FALSE; + gboolean config_changed = FALSE; + gboolean manage_counters = TRUE; + + static mainloop_timer_t *digest_timer = NULL; + + char *current_nodes_digest = NULL; + char *current_alerts_digest = NULL; + char *current_status_digest = NULL; + uint32_t change_section = cib_change_section_nodes + |cib_change_section_alerts + |cib_change_section_status; + + CRM_ASSERT(cib_status == pcmk_ok); + + if(digest_timer == NULL) { + digest_timer = mainloop_timer_add("digester", 5000, FALSE, cib_digester_cb, NULL); + } + + *reply = NULL; + *cib_diff = NULL; + current_cib = the_cib; + + /* Start processing the request... */ + op = crm_element_value(request, F_CIB_OPERATION); + crm_element_value_int(request, F_CIB_CALLOPTS, &call_options); + rc = cib_get_operation_id(op, &call_type); + + if (rc == pcmk_ok && privileged == FALSE) { + rc = cib_op_can_run(call_type, call_options, privileged); + } + + rc2 = cib_op_prepare(call_type, request, &input, §ion); + if (rc == pcmk_ok) { + rc = rc2; + } + + if (rc != pcmk_ok) { + crm_trace("Call setup failed: %s", pcmk_strerror(rc)); + goto done; + + } else if (cib_op_modifies(call_type) == FALSE) { + rc = cib_perform_op(op, call_options, cib_op_func(call_type), TRUE, + section, request, input, FALSE, &config_changed, + current_cib, &result_cib, NULL, &output); + + CRM_CHECK(result_cib == NULL, free_xml(result_cib)); + goto done; + } + + /* Handle a valid write action */ + if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) { + /* legacy code */ + manage_counters = FALSE; + cib__set_call_options(call_options, "call", cib_force_diff); + crm_trace("Global update detected"); + + CRM_CHECK(call_type == 3 || call_type == 4, crm_err("Call type: %d", call_type); + crm_log_xml_err(request, "bad op")); + } + + ping_modified_since = TRUE; + if (pcmk_is_set(call_options, cib_inhibit_bcast)) { + crm_trace("Skipping update: inhibit broadcast"); + manage_counters = FALSE; + } + + if (!pcmk_is_set(call_options, cib_dryrun) + && pcmk__str_eq(section, XML_CIB_TAG_STATUS, pcmk__str_casei)) { + // Copying large CIBs accounts for a huge percentage of our CIB usage + cib__set_call_options(call_options, "call", cib_zero_copy); + } else { + cib__clear_call_options(call_options, "call", cib_zero_copy); + } + +#define XPATH_CONFIG "//" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION +#define XPATH_NODES XPATH_CONFIG "/" XML_CIB_TAG_NODES +#define XPATH_ALERTS XPATH_CONFIG "/" XML_CIB_TAG_ALERTS +#define XPATH_STATUS "//" XML_TAG_CIB "/" XML_CIB_TAG_STATUS + + // Calculate the hash value of the section before the change + if (pcmk__str_eq(PCMK__CIB_REQUEST_REPLACE, op, pcmk__str_none)) { + current_nodes_digest = calculate_section_digest(XPATH_NODES, + current_cib); + current_alerts_digest = calculate_section_digest(XPATH_ALERTS, + current_cib); + current_status_digest = calculate_section_digest(XPATH_STATUS, + current_cib); + crm_trace("current-digest %s:%s:%s", current_nodes_digest, + current_alerts_digest, current_status_digest); + } + + // result_cib must not be modified after cib_perform_op() returns + rc = cib_perform_op(op, call_options, cib_op_func(call_type), FALSE, + section, request, input, manage_counters, + &config_changed, current_cib, &result_cib, cib_diff, + &output); + + if (!manage_counters) { + int format = 1; + + /* Legacy code + * If the diff is NULL at this point, it's because nothing changed + */ + if (*cib_diff != NULL) { + crm_element_value_int(*cib_diff, "format", &format); + } + + if (format == 1) { + config_changed = cib__config_changed_v1(NULL, NULL, cib_diff); + } + } + + /* Always write to disk for successful replace and upgrade ops. This also + * negates the need to detect ordering changes. + */ + if ((rc == pcmk_ok) + && pcmk__str_any_of(op, + PCMK__CIB_REQUEST_REPLACE, + PCMK__CIB_REQUEST_UPGRADE, + NULL)) { + config_changed = TRUE; + } + + if (rc == pcmk_ok && !pcmk_is_set(call_options, cib_dryrun)) { + crm_trace("Activating %s->%s%s%s", + crm_element_value(current_cib, XML_ATTR_NUMUPDATES), + crm_element_value(result_cib, XML_ATTR_NUMUPDATES), + (pcmk_is_set(call_options, cib_zero_copy)? " zero-copy" : ""), + (config_changed? " changed" : "")); + if (!pcmk_is_set(call_options, cib_zero_copy)) { + rc = activateCibXml(result_cib, config_changed, op); + crm_trace("Activated %s (%d)", + crm_element_value(current_cib, XML_ATTR_NUMUPDATES), rc); + } + + if ((rc == pcmk_ok) && contains_config_change(*cib_diff)) { + cib_read_config(config_hash, result_cib); + } + + if (pcmk__str_eq(PCMK__CIB_REQUEST_REPLACE, op, pcmk__str_none)) { + char *result_nodes_digest = NULL; + char *result_alerts_digest = NULL; + char *result_status_digest = NULL; + + /* Calculate the hash value of the changed section. */ + result_nodes_digest = calculate_section_digest(XPATH_NODES, + result_cib); + result_alerts_digest = calculate_section_digest(XPATH_ALERTS, + result_cib); + result_status_digest = calculate_section_digest(XPATH_STATUS, + result_cib); + crm_trace("result-digest %s:%s:%s", result_nodes_digest, + result_alerts_digest, result_status_digest); + + if (pcmk__str_eq(current_nodes_digest, result_nodes_digest, + pcmk__str_none)) { + change_section = + pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, + "CIB change section", + "change_section", change_section, + cib_change_section_nodes, "nodes"); + } + + if (pcmk__str_eq(current_alerts_digest, result_alerts_digest, + pcmk__str_none)) { + change_section = + pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, + "CIB change section", + "change_section", change_section, + cib_change_section_alerts, "alerts"); + } + + if (pcmk__str_eq(current_status_digest, result_status_digest, + pcmk__str_none)) { + change_section = + pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, + "CIB change section", + "change_section", change_section, + cib_change_section_status, "status"); + } + + if (change_section != cib_change_section_none) { + send_r_notify = TRUE; + } + + free(result_nodes_digest); + free(result_alerts_digest); + free(result_status_digest); + + } else if (pcmk__str_eq(PCMK__CIB_REQUEST_ERASE, op, pcmk__str_none)) { + send_r_notify = TRUE; + } + + mainloop_timer_stop(digest_timer); + mainloop_timer_start(digest_timer); + + } else if (rc == -pcmk_err_schema_validation) { + CRM_ASSERT(!pcmk_is_set(call_options, cib_zero_copy)); + + if (output != NULL) { + crm_log_xml_info(output, "cib:output"); + free_xml(output); + } + + output = result_cib; + + } else { + crm_trace("Not activating %d %d %s", rc, + pcmk_is_set(call_options, cib_dryrun), + crm_element_value(result_cib, XML_ATTR_NUMUPDATES)); + if (!pcmk_is_set(call_options, cib_zero_copy)) { + free_xml(result_cib); + } + } + + if ((call_options & (cib_inhibit_notify|cib_dryrun)) == 0) { + crm_trace("Sending notifications %d", + pcmk_is_set(call_options, cib_dryrun)); + cib_diff_notify(op, rc, call_id, client_id, client_name, origin, input, + *cib_diff); + } + + if (send_r_notify) { + cib_replace_notify(op, rc, call_id, client_id, client_name, origin, + the_cib, *cib_diff, change_section); + } + + pcmk__output_set_log_level(logger_out, LOG_TRACE); + logger_out->message(logger_out, "xml-patchset", *cib_diff); + + done: + if (!pcmk_is_set(call_options, cib_discard_reply) || cib_legacy_mode()) { + const char *caller = crm_element_value(request, F_CIB_CLIENTID); + + *reply = create_xml_node(NULL, "cib-reply"); + crm_xml_add(*reply, F_TYPE, T_CIB); + crm_xml_add(*reply, F_CIB_OPERATION, op); + crm_xml_add(*reply, F_CIB_CALLID, call_id); + crm_xml_add(*reply, F_CIB_CLIENTID, caller); + crm_xml_add_int(*reply, F_CIB_CALLOPTS, call_options); + crm_xml_add_int(*reply, F_CIB_RC, rc); + + if (output != NULL) { + crm_trace("Attaching reply output"); + add_message_xml(*reply, F_CIB_CALLDATA, output); + } + + crm_log_xml_explicit(*reply, "cib:reply"); + } + + crm_trace("cleanup"); + + if (cib_op_modifies(call_type) == FALSE && output != current_cib) { + free_xml(output); + output = NULL; + } + + if (call_type >= 0) { + cib_op_cleanup(call_type, call_options, &input, &output); + } + + free(current_nodes_digest); + free(current_alerts_digest); + free(current_status_digest); + + crm_trace("done"); + return rc; +} + +void +cib_peer_callback(xmlNode * msg, void *private_data) +{ + const char *reason = NULL; + const char *originator = crm_element_value(msg, F_ORIG); + + if (cib_legacy_mode() + && pcmk__str_eq(originator, OUR_NODENAME, + pcmk__str_casei|pcmk__str_null_matches)) { + /* message is from ourselves */ + int bcast_id = 0; + + if (!(crm_element_value_int(msg, F_CIB_LOCAL_NOTIFY_ID, &bcast_id))) { + check_local_notify(bcast_id); + } + return; + + } else if (crm_peer_cache == NULL) { + reason = "membership not established"; + goto bail; + } + + if (crm_element_value(msg, F_CIB_CLIENTNAME) == NULL) { + crm_xml_add(msg, F_CIB_CLIENTNAME, originator); + } + + /* crm_log_xml_trace(msg, "Peer[inbound]"); */ + cib_process_request(msg, TRUE, NULL); + return; + + bail: + if (reason) { + const char *seq = crm_element_value(msg, F_SEQ); + const char *op = crm_element_value(msg, F_CIB_OPERATION); + + crm_warn("Discarding %s message (%s) from %s: %s", op, seq, originator, reason); + } +} + +static gboolean +cib_force_exit(gpointer data) +{ + crm_notice("Forcing exit!"); + terminate_cib(__func__, CRM_EX_ERROR); + return FALSE; +} + +static void +disconnect_remote_client(gpointer key, gpointer value, gpointer user_data) +{ + pcmk__client_t *a_client = value; + + crm_err("Can't disconnect client %s: Not implemented", + pcmk__client_name(a_client)); +} + +static void +initiate_exit(void) +{ + int active = 0; + xmlNode *leaving = NULL; + + active = crm_active_peers(); + if (active < 2) { + terminate_cib(__func__, 0); + return; + } + + crm_info("Sending disconnect notification to %d peers...", active); + + leaving = create_xml_node(NULL, "exit-notification"); + crm_xml_add(leaving, F_TYPE, "cib"); + crm_xml_add(leaving, F_CIB_OPERATION, PCMK__CIB_REQUEST_SHUTDOWN); + + send_cluster_message(NULL, crm_msg_cib, leaving, TRUE); + free_xml(leaving); + + g_timeout_add(EXIT_ESCALATION_MS, cib_force_exit, NULL); +} + +void +cib_shutdown(int nsig) +{ + struct qb_ipcs_stats srv_stats; + + if (cib_shutdown_flag == FALSE) { + int disconnects = 0; + qb_ipcs_connection_t *c = NULL; + + cib_shutdown_flag = TRUE; + + c = qb_ipcs_connection_first_get(ipcs_rw); + while (c != NULL) { + qb_ipcs_connection_t *last = c; + + c = qb_ipcs_connection_next_get(ipcs_rw, last); + + crm_debug("Disconnecting r/w client %p...", last); + qb_ipcs_disconnect(last); + qb_ipcs_connection_unref(last); + disconnects++; + } + + c = qb_ipcs_connection_first_get(ipcs_ro); + while (c != NULL) { + qb_ipcs_connection_t *last = c; + + c = qb_ipcs_connection_next_get(ipcs_ro, last); + + crm_debug("Disconnecting r/o client %p...", last); + qb_ipcs_disconnect(last); + qb_ipcs_connection_unref(last); + disconnects++; + } + + c = qb_ipcs_connection_first_get(ipcs_shm); + while (c != NULL) { + qb_ipcs_connection_t *last = c; + + c = qb_ipcs_connection_next_get(ipcs_shm, last); + + crm_debug("Disconnecting non-blocking r/w client %p...", last); + qb_ipcs_disconnect(last); + qb_ipcs_connection_unref(last); + disconnects++; + } + + disconnects += pcmk__ipc_client_count(); + + crm_debug("Disconnecting %d remote clients", pcmk__ipc_client_count()); + pcmk__foreach_ipc_client(disconnect_remote_client, NULL); + crm_info("Disconnected %d clients", disconnects); + } + + qb_ipcs_stats_get(ipcs_rw, &srv_stats, QB_FALSE); + + if (pcmk__ipc_client_count() == 0) { + crm_info("All clients disconnected (%d)", srv_stats.active_connections); + initiate_exit(); + + } else { + crm_info("Waiting on %d clients to disconnect (%d)", + pcmk__ipc_client_count(), srv_stats.active_connections); + } +} + +extern int remote_fd; +extern int remote_tls_fd; + +/*! + * \internal + * \brief Close remote sockets, free the global CIB and quit + * + * \param[in] caller Name of calling function (for log message) + * \param[in] fast If -1, skip disconnect; if positive, exit that + */ +void +terminate_cib(const char *caller, int fast) +{ + crm_info("%s: Exiting%s...", caller, + (fast > 0)? " fast" : mainloop ? " from mainloop" : ""); + + if (remote_fd > 0) { + close(remote_fd); + remote_fd = 0; + } + if (remote_tls_fd > 0) { + close(remote_tls_fd); + remote_tls_fd = 0; + } + + uninitializeCib(); + + if (logger_out != NULL) { + logger_out->finish(logger_out, CRM_EX_OK, true, NULL); + pcmk__output_free(logger_out); + logger_out = NULL; + } + + if (fast > 0) { + /* Quit fast on error */ + pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm); + crm_exit(fast); + + } else if ((mainloop != NULL) && g_main_loop_is_running(mainloop)) { + /* Quit via returning from the main loop. If fast == -1, we skip the + * disconnect here, and it will be done when the main loop returns + * (this allows the peer status callback to avoid messing with the + * peer caches). + */ + if (fast == 0) { + crm_cluster_disconnect(crm_cluster); + } + g_main_loop_quit(mainloop); + + } else { + /* Quit via clean exit. Even the peer status callback can disconnect + * here, because we're not returning control to the caller. */ + crm_cluster_disconnect(crm_cluster); + pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm); + crm_exit(CRM_EX_OK); + } +} diff --git a/daemons/based/based_common.c b/daemons/based/based_common.c new file mode 100644 index 0000000..7e68cf0 --- /dev/null +++ b/daemons/based/based_common.c @@ -0,0 +1,352 @@ +/* + * Copyright 2008-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +gboolean stand_alone = FALSE; + +extern int cib_perform_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, + gboolean privileged); + +static xmlNode * +cib_prepare_common(xmlNode * root, const char *section) +{ + xmlNode *data = NULL; + + /* extract the CIB from the fragment */ + if (root == NULL) { + return NULL; + + } else if (pcmk__strcase_any_of(crm_element_name(root), XML_TAG_FRAGMENT, + F_CRM_DATA, F_CIB_CALLDATA, NULL)) { + data = first_named_child(root, XML_TAG_CIB); + + } else { + data = root; + } + + /* grab the section specified for the command */ + if (section != NULL && data != NULL && pcmk__str_eq(crm_element_name(data), XML_TAG_CIB, pcmk__str_none)) { + data = pcmk_find_cib_element(data, section); + } + + /* crm_log_xml_trace(root, "cib:input"); */ + return data; +} + +static int +cib_prepare_none(xmlNode * request, xmlNode ** data, const char **section) +{ + *data = NULL; + *section = crm_element_value(request, F_CIB_SECTION); + return pcmk_ok; +} + +static int +cib_prepare_data(xmlNode * request, xmlNode ** data, const char **section) +{ + xmlNode *input_fragment = get_message_xml(request, F_CIB_CALLDATA); + + *section = crm_element_value(request, F_CIB_SECTION); + *data = cib_prepare_common(input_fragment, *section); + /* crm_log_xml_debug(*data, "data"); */ + return pcmk_ok; +} + +static int +cib_prepare_sync(xmlNode * request, xmlNode ** data, const char **section) +{ + *data = NULL; + *section = crm_element_value(request, F_CIB_SECTION); + return pcmk_ok; +} + +static int +cib_prepare_diff(xmlNode * request, xmlNode ** data, const char **section) +{ + xmlNode *input_fragment = NULL; + + *data = NULL; + *section = NULL; + + if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) { + input_fragment = get_message_xml(request, F_CIB_UPDATE_DIFF); + } else { + input_fragment = get_message_xml(request, F_CIB_CALLDATA); + } + + CRM_CHECK(input_fragment != NULL, crm_log_xml_warn(request, "no input")); + *data = cib_prepare_common(input_fragment, NULL); + return pcmk_ok; +} + +static int +cib_cleanup_query(int options, xmlNode ** data, xmlNode ** output) +{ + CRM_LOG_ASSERT(*data == NULL); + if ((options & cib_no_children) + || pcmk__str_eq(crm_element_name(*output), "xpath-query", pcmk__str_casei)) { + free_xml(*output); + } + return pcmk_ok; +} + +static int +cib_cleanup_data(int options, xmlNode ** data, xmlNode ** output) +{ + free_xml(*output); + *data = NULL; + return pcmk_ok; +} + +static int +cib_cleanup_output(int options, xmlNode ** data, xmlNode ** output) +{ + free_xml(*output); + return pcmk_ok; +} + +static int +cib_cleanup_none(int options, xmlNode ** data, xmlNode ** output) +{ + CRM_LOG_ASSERT(*data == NULL); + CRM_LOG_ASSERT(*output == NULL); + return pcmk_ok; +} + +static cib_operation_t cib_server_ops[] = { + // Booleans are modifies_cib, needs_privileges + { + NULL, FALSE, FALSE, + cib_prepare_none, cib_cleanup_none, cib_process_default + }, + { + PCMK__CIB_REQUEST_QUERY, FALSE, FALSE, + cib_prepare_none, cib_cleanup_query, cib_process_query + }, + { + PCMK__CIB_REQUEST_MODIFY, TRUE, TRUE, + cib_prepare_data, cib_cleanup_data, cib_process_modify + }, + { + PCMK__CIB_REQUEST_APPLY_PATCH, TRUE, TRUE, + cib_prepare_diff, cib_cleanup_data, cib_server_process_diff + }, + { + PCMK__CIB_REQUEST_REPLACE, TRUE, TRUE, + cib_prepare_data, cib_cleanup_data, cib_process_replace_svr + }, + { + PCMK__CIB_REQUEST_CREATE, TRUE, TRUE, + cib_prepare_data, cib_cleanup_data, cib_process_create + }, + { + PCMK__CIB_REQUEST_DELETE, TRUE, TRUE, + cib_prepare_data, cib_cleanup_data, cib_process_delete + }, + { + PCMK__CIB_REQUEST_SYNC_TO_ALL, FALSE, TRUE, + cib_prepare_sync, cib_cleanup_none, cib_process_sync + }, + { + PCMK__CIB_REQUEST_BUMP, TRUE, TRUE, + cib_prepare_none, cib_cleanup_output, cib_process_bump + }, + { + PCMK__CIB_REQUEST_ERASE, TRUE, TRUE, + cib_prepare_none, cib_cleanup_output, cib_process_erase + }, + { + PCMK__CIB_REQUEST_NOOP, FALSE, FALSE, + cib_prepare_none, cib_cleanup_none, cib_process_default + }, + { + PCMK__CIB_REQUEST_ABS_DELETE, TRUE, TRUE, + cib_prepare_data, cib_cleanup_data, cib_process_delete_absolute + }, + { + PCMK__CIB_REQUEST_UPGRADE, TRUE, TRUE, + cib_prepare_none, cib_cleanup_output, cib_process_upgrade_server + }, + { + PCMK__CIB_REQUEST_SECONDARY, FALSE, TRUE, + cib_prepare_none, cib_cleanup_none, cib_process_readwrite + }, + { + PCMK__CIB_REQUEST_ALL_SECONDARY, FALSE, TRUE, + cib_prepare_none, cib_cleanup_none, cib_process_readwrite + }, + { + PCMK__CIB_REQUEST_SYNC_TO_ONE, FALSE, TRUE, + cib_prepare_sync, cib_cleanup_none, cib_process_sync_one + }, + { + PCMK__CIB_REQUEST_PRIMARY, TRUE, TRUE, + cib_prepare_data, cib_cleanup_data, cib_process_readwrite + }, + { + PCMK__CIB_REQUEST_IS_PRIMARY, FALSE, TRUE, + cib_prepare_none, cib_cleanup_none, cib_process_readwrite + }, + { + PCMK__CIB_REQUEST_SHUTDOWN, FALSE, TRUE, + cib_prepare_sync, cib_cleanup_none, cib_process_shutdown_req + }, + { + CRM_OP_PING, FALSE, FALSE, + cib_prepare_none, cib_cleanup_output, cib_process_ping + }, +}; + +int +cib_get_operation_id(const char *op, int *operation) +{ + static GHashTable *operation_hash = NULL; + + if (operation_hash == NULL) { + int lpc = 0; + int max_msg_types = PCMK__NELEM(cib_server_ops); + + operation_hash = pcmk__strkey_table(NULL, free); + for (lpc = 1; lpc < max_msg_types; lpc++) { + int *value = malloc(sizeof(int)); + + if(value) { + *value = lpc; + g_hash_table_insert(operation_hash, (gpointer) cib_server_ops[lpc].operation, value); + } + } + } + + if (op != NULL) { + int *value = g_hash_table_lookup(operation_hash, op); + + if (value) { + *operation = *value; + return pcmk_ok; + } + } + crm_err("Operation %s is not valid", op); + *operation = -1; + return -EINVAL; +} + +xmlNode * +cib_msg_copy(xmlNode * msg, gboolean with_data) +{ + int lpc = 0; + const char *field = NULL; + const char *value = NULL; + xmlNode *value_struct = NULL; + + static const char *field_list[] = { + F_XML_TAGNAME, + F_TYPE, + F_CIB_CLIENTID, + F_CIB_CALLOPTS, + F_CIB_CALLID, + F_CIB_OPERATION, + F_CIB_ISREPLY, + F_CIB_SECTION, + F_CIB_HOST, + F_CIB_RC, + F_CIB_DELEGATED, + F_CIB_OBJID, + F_CIB_OBJTYPE, + F_CIB_EXISTING, + F_CIB_SEENCOUNT, + F_CIB_TIMEOUT, + F_CIB_GLOBAL_UPDATE, + F_CIB_CLIENTNAME, + F_CIB_USER, + F_CIB_NOTIFY_TYPE, + F_CIB_NOTIFY_ACTIVATE + }; + + static const char *data_list[] = { + F_CIB_CALLDATA, + F_CIB_UPDATE, + F_CIB_UPDATE_RESULT + }; + + xmlNode *copy = create_xml_node(NULL, "copy"); + + CRM_ASSERT(copy != NULL); + + for (lpc = 0; lpc < PCMK__NELEM(field_list); lpc++) { + field = field_list[lpc]; + value = crm_element_value(msg, field); + if (value != NULL) { + crm_xml_add(copy, field, value); + } + } + for (lpc = 0; with_data && lpc < PCMK__NELEM(data_list); lpc++) { + field = data_list[lpc]; + value_struct = get_message_xml(msg, field); + if (value_struct != NULL) { + add_message_xml(copy, field, value_struct); + } + } + + return copy; +} + +cib_op_t * +cib_op_func(int call_type) +{ + return &(cib_server_ops[call_type].fn); +} + +gboolean +cib_op_modifies(int call_type) +{ + return cib_server_ops[call_type].modifies_cib; +} + +int +cib_op_can_run(int call_type, int call_options, bool privileged) +{ + if (!privileged && cib_server_ops[call_type].needs_privileges) { + return -EACCES; + } + return pcmk_ok; +} + +int +cib_op_prepare(int call_type, xmlNode * request, xmlNode ** input, const char **section) +{ + crm_trace("Prepare %d", call_type); + return cib_server_ops[call_type].prepare(request, input, section); +} + +int +cib_op_cleanup(int call_type, int options, xmlNode ** input, xmlNode ** output) +{ + crm_trace("Cleanup %d", call_type); + return cib_server_ops[call_type].cleanup(options, input, output); +} diff --git a/daemons/based/based_io.c b/daemons/based/based_io.c new file mode 100644 index 0000000..fc34f39 --- /dev/null +++ b/daemons/based/based_io.c @@ -0,0 +1,473 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include + +crm_trigger_t *cib_writer = NULL; + +int write_cib_contents(gpointer p); + +static void +cib_rename(const char *old) +{ + int new_fd; + char *new = crm_strdup_printf("%s/cib.auto.XXXXXX", cib_root); + + umask(S_IWGRP | S_IWOTH | S_IROTH); + new_fd = mkstemp(new); + crm_err("Archiving unusable file %s as %s", old, new); + if ((new_fd < 0) || (rename(old, new) < 0)) { + crm_perror(LOG_ERR, "Couldn't rename %s as %s", old, new); + crm_err("Disabling disk writes and continuing"); + cib_writes_enabled = FALSE; + } + if (new_fd > 0) { + close(new_fd); + } + free(new); +} + +/* + * It is the callers responsibility to free the output of this function + */ + +static xmlNode * +retrieveCib(const char *filename, const char *sigfile) +{ + xmlNode *root = NULL; + + crm_info("Reading cluster configuration file %s (digest: %s)", + filename, sigfile); + switch (cib_file_read_and_verify(filename, sigfile, &root)) { + case -pcmk_err_cib_corrupt: + crm_warn("Continuing but %s will NOT be used.", filename); + break; + + case -pcmk_err_cib_modified: + /* Archive the original files so the contents are not lost */ + crm_warn("Continuing but %s will NOT be used.", filename); + cib_rename(filename); + cib_rename(sigfile); + break; + } + return root; +} + +/* + * for OSs without support for direntry->d_type, like Solaris + */ +#ifndef DT_UNKNOWN +# define DT_UNKNOWN 0 +# define DT_FIFO 1 +# define DT_CHR 2 +# define DT_DIR 4 +# define DT_BLK 6 +# define DT_REG 8 +# define DT_LNK 10 +# define DT_SOCK 12 +# define DT_WHT 14 +#endif /*DT_UNKNOWN*/ + +static int cib_archive_filter(const struct dirent * a) +{ + int rc = 0; + /* Looking for regular files (d_type = 8) starting with 'cib-' and not ending in .sig */ + struct stat s; + char *a_path = crm_strdup_printf("%s/%s", cib_root, a->d_name); + + if(stat(a_path, &s) != 0) { + rc = errno; + crm_trace("%s - stat failed: %s (%d)", a->d_name, pcmk_strerror(rc), rc); + rc = 0; + + } else if ((s.st_mode & S_IFREG) != S_IFREG) { + unsigned char dtype; +#ifdef HAVE_STRUCT_DIRENT_D_TYPE + dtype = a->d_type; +#else + switch (s.st_mode & S_IFMT) { + case S_IFREG: dtype = DT_REG; break; + case S_IFDIR: dtype = DT_DIR; break; + case S_IFCHR: dtype = DT_CHR; break; + case S_IFBLK: dtype = DT_BLK; break; + case S_IFLNK: dtype = DT_LNK; break; + case S_IFIFO: dtype = DT_FIFO; break; + case S_IFSOCK: dtype = DT_SOCK; break; + default: dtype = DT_UNKNOWN; break; + } +#endif + crm_trace("%s - wrong type (%d)", a->d_name, dtype); + + } else if(strstr(a->d_name, "cib-") != a->d_name) { + crm_trace("%s - wrong prefix", a->d_name); + + } else if (pcmk__ends_with_ext(a->d_name, ".sig")) { + crm_trace("%s - wrong suffix", a->d_name); + + } else { + crm_debug("%s - candidate", a->d_name); + rc = 1; + } + + free(a_path); + return rc; +} + +static int cib_archive_sort(const struct dirent ** a, const struct dirent **b) +{ + /* Order by creation date - most recently created file first */ + int rc = 0; + struct stat buf; + + time_t a_age = 0; + time_t b_age = 0; + + char *a_path = crm_strdup_printf("%s/%s", cib_root, a[0]->d_name); + char *b_path = crm_strdup_printf("%s/%s", cib_root, b[0]->d_name); + + if(stat(a_path, &buf) == 0) { + a_age = buf.st_ctime; + } + if(stat(b_path, &buf) == 0) { + b_age = buf.st_ctime; + } + + free(a_path); + free(b_path); + + if(a_age > b_age) { + rc = 1; + } else if(a_age < b_age) { + rc = -1; + } + + crm_trace("%s (%lu) vs. %s (%lu) : %d", + a[0]->d_name, (unsigned long)a_age, + b[0]->d_name, (unsigned long)b_age, rc); + return rc; +} + +xmlNode * +readCibXmlFile(const char *dir, const char *file, gboolean discard_status) +{ + struct dirent **namelist = NULL; + + int lpc = 0; + char *sigfile = NULL; + char *sigfilepath = NULL; + char *filename = NULL; + const char *name = NULL; + const char *value = NULL; + const char *validation = NULL; + const char *use_valgrind = getenv("PCMK_valgrind_enabled"); + + xmlNode *root = NULL; + xmlNode *status = NULL; + + sigfile = crm_strdup_printf("%s.sig", file); + if (pcmk__daemon_can_write(dir, file) == FALSE + || pcmk__daemon_can_write(dir, sigfile) == FALSE) { + cib_status = -EACCES; + return NULL; + } + + filename = crm_strdup_printf("%s/%s", dir, file); + sigfilepath = crm_strdup_printf("%s/%s", dir, sigfile); + free(sigfile); + + cib_status = pcmk_ok; + root = retrieveCib(filename, sigfilepath); + free(filename); + free(sigfilepath); + + if (root == NULL) { + crm_warn("Primary configuration corrupt or unusable, trying backups in %s", cib_root); + lpc = scandir(cib_root, &namelist, cib_archive_filter, cib_archive_sort); + if (lpc < 0) { + crm_perror(LOG_NOTICE, "scandir(%s) failed", cib_root); + } + } + + while (root == NULL && lpc > 1) { + crm_debug("Testing %d candidates", lpc); + + lpc--; + + filename = crm_strdup_printf("%s/%s", cib_root, namelist[lpc]->d_name); + sigfile = crm_strdup_printf("%s.sig", filename); + + crm_info("Reading cluster configuration file %s (digest: %s)", + filename, sigfile); + if (cib_file_read_and_verify(filename, sigfile, &root) < 0) { + crm_warn("Continuing but %s will NOT be used.", filename); + } else { + crm_notice("Continuing with last valid configuration archive: %s", filename); + } + + free(namelist[lpc]); + free(filename); + free(sigfile); + } + free(namelist); + + if (root == NULL) { + root = createEmptyCib(0); + crm_warn("Continuing with an empty configuration."); + } + + if (cib_writes_enabled && use_valgrind && + (crm_is_true(use_valgrind) || strstr(use_valgrind, "pacemaker-based"))) { + + cib_writes_enabled = FALSE; + crm_err("*** Disabling disk writes to avoid confusing Valgrind ***"); + } + + status = find_xml_node(root, XML_CIB_TAG_STATUS, FALSE); + if (discard_status && status != NULL) { + /* strip out the status section if there is one */ + free_xml(status); + status = NULL; + } + if (status == NULL) { + create_xml_node(root, XML_CIB_TAG_STATUS); + } + + /* Do this before schema validation happens */ + + /* fill in some defaults */ + name = XML_ATTR_GENERATION_ADMIN; + value = crm_element_value(root, name); + if (value == NULL) { + crm_warn("No value for %s was specified in the configuration.", name); + crm_warn("The recommended course of action is to shutdown," + " run crm_verify and fix any errors it reports."); + crm_warn("We will default to zero and continue but may get" + " confused about which configuration to use if" + " multiple nodes are powered up at the same time."); + crm_xml_add_int(root, name, 0); + } + + name = XML_ATTR_GENERATION; + value = crm_element_value(root, name); + if (value == NULL) { + crm_xml_add_int(root, name, 0); + } + + name = XML_ATTR_NUMUPDATES; + value = crm_element_value(root, name); + if (value == NULL) { + crm_xml_add_int(root, name, 0); + } + + // Unset (DC should set appropriate value) + xml_remove_prop(root, XML_ATTR_DC_UUID); + + if (discard_status) { + crm_log_xml_trace(root, "[on-disk]"); + } + + validation = crm_element_value(root, XML_ATTR_VALIDATION); + if (validate_xml(root, NULL, TRUE) == FALSE) { + crm_err("CIB does not validate with %s", + pcmk__s(validation, "no schema specified")); + cib_status = -pcmk_err_schema_validation; + + } else if (validation == NULL) { + int version = 0; + + update_validation(&root, &version, 0, FALSE, FALSE); + if (version > 0) { + crm_notice("Enabling %s validation on" + " the existing (sane) configuration", get_schema_name(version)); + } else { + crm_err("CIB does not validate with any known schema"); + cib_status = -pcmk_err_schema_validation; + } + } + + return root; +} + +gboolean +uninitializeCib(void) +{ + xmlNode *tmp_cib = the_cib; + + if (tmp_cib == NULL) { + crm_debug("The CIB has already been deallocated."); + return FALSE; + } + + the_cib = NULL; + + crm_debug("Deallocating the CIB."); + + free_xml(tmp_cib); + + crm_debug("The CIB has been deallocated."); + + return TRUE; +} + +/* + * This method will free the old CIB pointer on success and the new one + * on failure. + */ +int +activateCibXml(xmlNode * new_cib, gboolean to_disk, const char *op) +{ + if (new_cib) { + xmlNode *saved_cib = the_cib; + + CRM_ASSERT(new_cib != saved_cib); + the_cib = new_cib; + free_xml(saved_cib); + if (cib_writes_enabled && cib_status == pcmk_ok && to_disk) { + crm_debug("Triggering CIB write for %s op", op); + mainloop_set_trigger(cib_writer); + } + return pcmk_ok; + } + + crm_err("Ignoring invalid CIB"); + if (the_cib) { + crm_warn("Reverting to last known CIB"); + } else { + crm_crit("Could not write out new CIB and no saved version to revert to"); + } + return -ENODATA; +} + +static void +cib_diskwrite_complete(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) +{ + const char *errmsg = "Could not write CIB to disk"; + + if ((exitcode != 0) && cib_writes_enabled) { + cib_writes_enabled = FALSE; + errmsg = "Disabling CIB disk writes after failure"; + } + + if ((signo == 0) && (exitcode == 0)) { + crm_trace("Disk write [%d] succeeded", (int) pid); + + } else if (signo == 0) { + crm_err("%s: process %d exited %d", errmsg, (int) pid, exitcode); + + } else { + crm_err("%s: process %d terminated with signal %d (%s)%s", + errmsg, (int) pid, signo, strsignal(signo), + (core? " and dumped core" : "")); + } + + mainloop_trigger_complete(cib_writer); +} + +int +write_cib_contents(gpointer p) +{ + int exit_rc = pcmk_ok; + xmlNode *cib_local = NULL; + + /* Make a copy of the CIB to write (possibly in a forked child) */ + if (p) { + /* Synchronous write out */ + cib_local = copy_xml(p); + + } else { + int pid = 0; + int bb_state = qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_STATE_GET, 0); + + /* Turn it off before the fork() to avoid: + * - 2 processes writing to the same shared mem + * - the child needing to disable it + * (which would close it from underneath the parent) + * This way, the shared mem files are already closed + */ + qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE); + + pid = fork(); + if (pid < 0) { + crm_perror(LOG_ERR, "Disabling disk writes after fork failure"); + cib_writes_enabled = FALSE; + return FALSE; + } + + if (pid) { + /* Parent */ + mainloop_child_add(pid, 0, "disk-writer", NULL, cib_diskwrite_complete); + if (bb_state == QB_LOG_STATE_ENABLED) { + /* Re-enable now that it it safe */ + qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); + } + + return -1; /* -1 means 'still work to do' */ + } + + /* Asynchronous write-out after a fork() */ + + /* In theory, we can scribble on the_cib here and not affect the parent, + * but let's be safe anyway. + */ + cib_local = copy_xml(the_cib); + } + + /* Write the CIB */ + exit_rc = cib_file_write_with_digest(cib_local, cib_root, "cib.xml"); + + /* A nonzero exit code will cause further writes to be disabled */ + free_xml(cib_local); + if (p == NULL) { + crm_exit_t exit_code = CRM_EX_OK; + + switch (exit_rc) { + case pcmk_ok: + exit_code = CRM_EX_OK; + break; + case pcmk_err_cib_modified: + exit_code = CRM_EX_DIGEST; // Existing CIB doesn't match digest + break; + case pcmk_err_cib_backup: // Existing CIB couldn't be backed up + case pcmk_err_cib_save: // New CIB couldn't be saved + exit_code = CRM_EX_CANTCREAT; + break; + default: + exit_code = CRM_EX_ERROR; + break; + } + + /* Use _exit() because exit() could affect the parent adversely */ + _exit(exit_code); + } + return exit_rc; +} diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c new file mode 100644 index 0000000..d46456c --- /dev/null +++ b/daemons/based/based_messages.c @@ -0,0 +1,427 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include + +/* Maximum number of diffs to ignore while waiting for a resync */ +#define MAX_DIFF_RETRY 5 + +bool based_is_primary = false; + +xmlNode *the_cib = NULL; + +int +cib_process_shutdown_req(const char *op, int options, const char *section, xmlNode * req, + xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, + xmlNode ** answer) +{ + const char *host = crm_element_value(req, F_ORIG); + + *answer = NULL; + + if (crm_element_value(req, F_CIB_ISREPLY) == NULL) { + crm_info("Peer %s is requesting to shut down", host); + return pcmk_ok; + } + + if (cib_shutdown_flag == FALSE) { + crm_err("Peer %s mistakenly thinks we wanted to shut down", host); + return -EINVAL; + } + + crm_info("Peer %s has acknowledged our shutdown request", host); + terminate_cib(__func__, 0); + return pcmk_ok; +} + +int +cib_process_default(const char *op, int options, const char *section, xmlNode * req, + xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, + xmlNode ** answer) +{ + int result = pcmk_ok; + + crm_trace("Processing \"%s\" event", op); + *answer = NULL; + + if (op == NULL) { + result = -EINVAL; + crm_err("No operation specified"); + + } else if (strcmp(PCMK__CIB_REQUEST_NOOP, op) != 0) { + result = -EPROTONOSUPPORT; + crm_err("Action [%s] is not supported by the CIB manager", op); + } + return result; +} + +int +cib_process_readwrite(const char *op, int options, const char *section, xmlNode * req, + xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, + xmlNode ** answer) +{ + int result = pcmk_ok; + + crm_trace("Processing \"%s\" event", op); + + if (pcmk__str_eq(op, PCMK__CIB_REQUEST_IS_PRIMARY, pcmk__str_none)) { + if (based_is_primary) { + result = pcmk_ok; + } else { + result = -EPERM; + } + return result; + } + + if (pcmk__str_eq(op, PCMK__CIB_REQUEST_PRIMARY, pcmk__str_none)) { + if (!based_is_primary) { + crm_info("We are now in R/W mode"); + based_is_primary = true; + } else { + crm_debug("We are still in R/W mode"); + } + + } else if (based_is_primary) { + crm_info("We are now in R/O mode"); + based_is_primary = false; + } + + return result; +} + +/* Set to 1 when a sync is requested, incremented when a diff is ignored, + * reset to 0 when a sync is received + */ +static int sync_in_progress = 0; + +void +send_sync_request(const char *host) +{ + xmlNode *sync_me = create_xml_node(NULL, "sync-me"); + + crm_info("Requesting re-sync from %s", (host? host : "all peers")); + sync_in_progress = 1; + + crm_xml_add(sync_me, F_TYPE, "cib"); + crm_xml_add(sync_me, F_CIB_OPERATION, PCMK__CIB_REQUEST_SYNC_TO_ONE); + crm_xml_add(sync_me, F_CIB_DELEGATED, + stand_alone? "localhost" : crm_cluster->uname); + + send_cluster_message(host ? crm_get_peer(0, host) : NULL, crm_msg_cib, sync_me, FALSE); + free_xml(sync_me); +} + +int +cib_process_ping(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, + xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) +{ + const char *host = crm_element_value(req, F_ORIG); + const char *seq = crm_element_value(req, F_CIB_PING_ID); + char *digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, CRM_FEATURE_SET); + + crm_trace("Processing \"%s\" event %s from %s", op, seq, host); + *answer = create_xml_node(NULL, XML_CRM_TAG_PING); + + crm_xml_add(*answer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); + crm_xml_add(*answer, XML_ATTR_DIGEST, digest); + crm_xml_add(*answer, F_CIB_PING_ID, seq); + + pcmk__if_tracing( + { + // Append additional detail so the receiver can log the differences + add_message_xml(*answer, F_CIB_CALLDATA, the_cib); + }, + { + // Always include at least the version details + const char *tag = TYPE(the_cib); + xmlNode *shallow = create_xml_node(NULL, tag); + + copy_in_properties(shallow, the_cib); + add_message_xml(*answer, F_CIB_CALLDATA, shallow); + free_xml(shallow); + } + ); + + crm_info("Reporting our current digest to %s: %s for %s.%s.%s", + host, digest, + crm_element_value(existing_cib, XML_ATTR_GENERATION_ADMIN), + crm_element_value(existing_cib, XML_ATTR_GENERATION), + crm_element_value(existing_cib, XML_ATTR_NUMUPDATES)); + + free(digest); + + return pcmk_ok; +} + +int +cib_process_sync(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, + xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) +{ + return sync_our_cib(req, TRUE); +} + +int +cib_process_upgrade_server(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, + xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) +{ + int rc = pcmk_ok; + + *answer = NULL; + + if(crm_element_value(req, F_CIB_SCHEMA_MAX)) { + /* The originator of an upgrade request sends it to the DC, without + * F_CIB_SCHEMA_MAX. If an upgrade is needed, the DC re-broadcasts the + * request with F_CIB_SCHEMA_MAX, and each node performs the upgrade + * (and notifies its local clients) here. + */ + return cib_process_upgrade( + op, options, section, req, input, existing_cib, result_cib, answer); + + } else { + int new_version = 0; + int current_version = 0; + xmlNode *scratch = copy_xml(existing_cib); + const char *host = crm_element_value(req, F_ORIG); + const char *value = crm_element_value(existing_cib, XML_ATTR_VALIDATION); + const char *client_id = crm_element_value(req, F_CIB_CLIENTID); + const char *call_opts = crm_element_value(req, F_CIB_CALLOPTS); + const char *call_id = crm_element_value(req, F_CIB_CALLID); + + crm_trace("Processing \"%s\" event", op); + if (value != NULL) { + current_version = get_schema_version(value); + } + + rc = update_validation(&scratch, &new_version, 0, TRUE, TRUE); + if (new_version > current_version) { + xmlNode *up = create_xml_node(NULL, __func__); + + rc = pcmk_ok; + crm_notice("Upgrade request from %s verified", host); + + crm_xml_add(up, F_TYPE, "cib"); + crm_xml_add(up, F_CIB_OPERATION, PCMK__CIB_REQUEST_UPGRADE); + crm_xml_add(up, F_CIB_SCHEMA_MAX, get_schema_name(new_version)); + crm_xml_add(up, F_CIB_DELEGATED, host); + crm_xml_add(up, F_CIB_CLIENTID, client_id); + crm_xml_add(up, F_CIB_CALLOPTS, call_opts); + crm_xml_add(up, F_CIB_CALLID, call_id); + + if (cib_legacy_mode() && based_is_primary) { + rc = cib_process_upgrade( + op, options, section, up, input, existing_cib, result_cib, answer); + + } else { + send_cluster_message(NULL, crm_msg_cib, up, FALSE); + } + + free_xml(up); + + } else if(rc == pcmk_ok) { + rc = -pcmk_err_schema_unchanged; + } + + if (rc != pcmk_ok) { + // Notify originating peer so it can notify its local clients + crm_node_t *origin = pcmk__search_cluster_node_cache(0, host); + + crm_info("Rejecting upgrade request from %s: %s " + CRM_XS " rc=%d peer=%s", host, pcmk_strerror(rc), rc, + (origin? origin->uname : "lost")); + + if (origin) { + xmlNode *up = create_xml_node(NULL, __func__); + + crm_xml_add(up, F_TYPE, "cib"); + crm_xml_add(up, F_CIB_OPERATION, PCMK__CIB_REQUEST_UPGRADE); + crm_xml_add(up, F_CIB_DELEGATED, host); + crm_xml_add(up, F_CIB_ISREPLY, host); + crm_xml_add(up, F_CIB_CLIENTID, client_id); + crm_xml_add(up, F_CIB_CALLOPTS, call_opts); + crm_xml_add(up, F_CIB_CALLID, call_id); + crm_xml_add_int(up, F_CIB_UPGRADE_RC, rc); + if (send_cluster_message(origin, crm_msg_cib, up, TRUE) + == FALSE) { + crm_warn("Could not send CIB upgrade result to %s", host); + } + free_xml(up); + } + } + free_xml(scratch); + } + return rc; +} + +int +cib_process_sync_one(const char *op, int options, const char *section, xmlNode * req, + xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, + xmlNode ** answer) +{ + return sync_our_cib(req, FALSE); +} + +int +cib_server_process_diff(const char *op, int options, const char *section, xmlNode * req, + xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, + xmlNode ** answer) +{ + int rc = pcmk_ok; + + if (sync_in_progress > MAX_DIFF_RETRY) { + /* Don't ignore diffs forever; the last request may have been lost. + * If the diff fails, we'll ask for another full resync. + */ + sync_in_progress = 0; + } + + // The primary instance should never ignore a diff + if (sync_in_progress && !based_is_primary) { + int diff_add_updates = 0; + int diff_add_epoch = 0; + int diff_add_admin_epoch = 0; + + int diff_del_updates = 0; + int diff_del_epoch = 0; + int diff_del_admin_epoch = 0; + + cib_diff_version_details(input, + &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, + &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); + + sync_in_progress++; + crm_notice("Not applying diff %d.%d.%d -> %d.%d.%d (sync in progress)", + diff_del_admin_epoch, diff_del_epoch, diff_del_updates, + diff_add_admin_epoch, diff_add_epoch, diff_add_updates); + return -pcmk_err_diff_resync; + } + + rc = cib_process_diff(op, options, section, req, input, existing_cib, result_cib, answer); + crm_trace("result: %s (%d), %s", pcmk_strerror(rc), rc, + (based_is_primary? "primary": "secondary")); + + if ((rc == -pcmk_err_diff_resync) && !based_is_primary) { + free_xml(*result_cib); + *result_cib = NULL; + send_sync_request(NULL); + + } else if (rc == -pcmk_err_diff_resync) { + rc = -pcmk_err_diff_failed; + if (options & cib_force_diff) { + crm_warn("Not requesting full refresh in R/W mode"); + } + + } else if ((rc != pcmk_ok) && !based_is_primary && cib_legacy_mode()) { + crm_warn("Requesting full CIB refresh because update failed: %s" + CRM_XS " rc=%d", pcmk_strerror(rc), rc); + + pcmk__output_set_log_level(logger_out, LOG_INFO); + logger_out->message(logger_out, "xml-patchset", input); + free_xml(*result_cib); + *result_cib = NULL; + send_sync_request(NULL); + } + + return rc; +} + +int +cib_process_replace_svr(const char *op, int options, const char *section, xmlNode * req, + xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, + xmlNode ** answer) +{ + const char *tag = crm_element_name(input); + int rc = + cib_process_replace(op, options, section, req, input, existing_cib, result_cib, answer); + if (rc == pcmk_ok && pcmk__str_eq(tag, XML_TAG_CIB, pcmk__str_casei)) { + sync_in_progress = 0; + } + return rc; +} + +int +cib_process_delete_absolute(const char *op, int options, const char *section, xmlNode * req, + xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, + xmlNode ** answer) +{ + return -EINVAL; +} + +int +sync_our_cib(xmlNode * request, gboolean all) +{ + int result = pcmk_ok; + char *digest = NULL; + const char *host = crm_element_value(request, F_ORIG); + const char *op = crm_element_value(request, F_CIB_OPERATION); + + xmlNode *replace_request = NULL; + + CRM_CHECK(the_cib != NULL, return -EINVAL); + + replace_request = cib_msg_copy(request, FALSE); + CRM_CHECK(replace_request != NULL, return -EINVAL); + + crm_debug("Syncing CIB to %s", all ? "all peers" : host); + if (all == FALSE && host == NULL) { + crm_log_xml_err(request, "bad sync"); + } + + /* remove the "all == FALSE" condition + * + * sync_from was failing, the local client wasn't being notified + * because it didn't know it was a reply + * setting this does not prevent the other nodes from applying it + * if all == TRUE + */ + if (host != NULL) { + crm_xml_add(replace_request, F_CIB_ISREPLY, host); + } + if (all) { + xml_remove_prop(replace_request, F_CIB_HOST); + } + + crm_xml_add(replace_request, F_CIB_OPERATION, PCMK__CIB_REQUEST_REPLACE); + crm_xml_add(replace_request, "original_" F_CIB_OPERATION, op); + pcmk__xe_set_bool_attr(replace_request, F_CIB_GLOBAL_UPDATE, true); + + crm_xml_add(replace_request, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); + digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, CRM_FEATURE_SET); + crm_xml_add(replace_request, XML_ATTR_DIGEST, digest); + + add_message_xml(replace_request, F_CIB_CALLDATA, the_cib); + + if (send_cluster_message + (all ? NULL : crm_get_peer(0, host), crm_msg_cib, replace_request, FALSE) == FALSE) { + result = -ENOTCONN; + } + free_xml(replace_request); + free(digest); + return result; +} diff --git a/daemons/based/based_notify.c b/daemons/based/based_notify.c new file mode 100644 index 0000000..5881f6d --- /dev/null +++ b/daemons/based/based_notify.c @@ -0,0 +1,305 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include +#include // PRIx64 + +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include + +struct cib_notification_s { + xmlNode *msg; + struct iovec *iov; + int32_t iov_size; +}; + +static void +cib_notify_send_one(gpointer key, gpointer value, gpointer user_data) +{ + const char *type = NULL; + gboolean do_send = FALSE; + int rc = pcmk_rc_ok; + + pcmk__client_t *client = value; + struct cib_notification_s *update = user_data; + + if (client->ipcs == NULL && client->remote == NULL) { + crm_warn("Skipping client with NULL channel"); + return; + } + + type = crm_element_value(update->msg, F_SUBTYPE); + CRM_LOG_ASSERT(type != NULL); + + if (pcmk_is_set(client->flags, cib_notify_diff) + && pcmk__str_eq(type, T_CIB_DIFF_NOTIFY, pcmk__str_casei)) { + + do_send = TRUE; + + } else if (pcmk_is_set(client->flags, cib_notify_replace) + && pcmk__str_eq(type, T_CIB_REPLACE_NOTIFY, pcmk__str_casei)) { + do_send = TRUE; + + } else if (pcmk_is_set(client->flags, cib_notify_confirm) + && pcmk__str_eq(type, T_CIB_UPDATE_CONFIRM, pcmk__str_casei)) { + do_send = TRUE; + + } else if (pcmk_is_set(client->flags, cib_notify_pre) + && pcmk__str_eq(type, T_CIB_PRE_NOTIFY, pcmk__str_casei)) { + do_send = TRUE; + + } else if (pcmk_is_set(client->flags, cib_notify_post) + && pcmk__str_eq(type, T_CIB_POST_NOTIFY, pcmk__str_casei)) { + + do_send = TRUE; + } + + if (do_send) { + switch (PCMK__CLIENT_TYPE(client)) { + case pcmk__client_ipc: + rc = pcmk__ipc_send_iov(client, update->iov, + crm_ipc_server_event); + if (rc != pcmk_rc_ok) { + crm_warn("Could not notify client %s: %s " CRM_XS " id=%s", + pcmk__client_name(client), pcmk_rc_str(rc), + client->id); + } + break; +#ifdef HAVE_GNUTLS_GNUTLS_H + case pcmk__client_tls: +#endif + case pcmk__client_tcp: + crm_debug("Sent %s notification to client %s (id %s)", + type, pcmk__client_name(client), client->id); + pcmk__remote_send_xml(client->remote, update->msg); + break; + default: + crm_err("Unknown transport for client %s " + CRM_XS " flags=%#016" PRIx64, + pcmk__client_name(client), client->flags); + } + } +} + +static void +cib_notify_send(xmlNode * xml) +{ + struct iovec *iov; + struct cib_notification_s update; + + ssize_t bytes = 0; + int rc = pcmk__ipc_prepare_iov(0, xml, 0, &iov, &bytes); + + if (rc == pcmk_rc_ok) { + update.msg = xml; + update.iov = iov; + update.iov_size = bytes; + pcmk__foreach_ipc_client(cib_notify_send_one, &update); + + } else { + crm_notice("Could not notify clients: %s " CRM_XS " rc=%d", + pcmk_rc_str(rc), rc); + } + pcmk_free_ipc_event(iov); +} + +static void +attach_cib_generation(xmlNode *msg, const char *field, xmlNode *a_cib) +{ + xmlNode *generation = create_xml_node(NULL, XML_CIB_TAG_GENERATION_TUPPLE); + + if (a_cib != NULL) { + copy_in_properties(generation, a_cib); + } + add_message_xml(msg, field, generation); + free_xml(generation); +} + +void +cib_diff_notify(const char *op, int result, const char *call_id, + const char *client_id, const char *client_name, + const char *origin, xmlNode *update, xmlNode *diff) +{ + int add_updates = 0; + int add_epoch = 0; + int add_admin_epoch = 0; + + int del_updates = 0; + int del_epoch = 0; + int del_admin_epoch = 0; + + uint8_t log_level = LOG_TRACE; + + xmlNode *update_msg = NULL; + const char *type = NULL; + + if (diff == NULL) { + return; + } + + if (result != pcmk_ok) { + log_level = LOG_WARNING; + } + + cib_diff_version_details(diff, &add_admin_epoch, &add_epoch, &add_updates, + &del_admin_epoch, &del_epoch, &del_updates); + + if ((add_admin_epoch != del_admin_epoch) + || (add_epoch != del_epoch) + || (add_updates != del_updates)) { + + do_crm_log(log_level, + "Updated CIB generation %d.%d.%d to %d.%d.%d from client " + "%s%s%s (%s) (%s)", + del_admin_epoch, del_epoch, del_updates, + add_admin_epoch, add_epoch, add_updates, + client_name, + ((call_id != NULL)? " call " : ""), pcmk__s(call_id, ""), + pcmk__s(origin, "unspecified peer"), pcmk_strerror(result)); + + } else if ((add_admin_epoch != 0) + || (add_epoch != 0) + || (add_updates != 0)) { + + do_crm_log(log_level, + "Local-only change to CIB generation %d.%d.%d from client " + "%s%s%s (%s) (%s)", + add_admin_epoch, add_epoch, add_updates, + client_name, + ((call_id != NULL)? " call " : ""), pcmk__s(call_id, ""), + pcmk__s(origin, "unspecified peer"), pcmk_strerror(result)); + } + + update_msg = create_xml_node(NULL, "notify"); + + crm_xml_add(update_msg, F_TYPE, T_CIB_NOTIFY); + crm_xml_add(update_msg, F_SUBTYPE, T_CIB_DIFF_NOTIFY); + crm_xml_add(update_msg, F_CIB_OPERATION, op); + crm_xml_add(update_msg, F_CIB_CLIENTID, client_id); + crm_xml_add(update_msg, F_CIB_CALLID, call_id); + crm_xml_add(update_msg, F_ORIG, origin); + crm_xml_add_int(update_msg, F_CIB_RC, result); + + if (update != NULL) { + type = crm_element_name(update); + crm_trace("Setting type to update->name: %s", type); + } else { + type = crm_element_name(diff); + crm_trace("Setting type to new_obj->name: %s", type); + } + crm_xml_add(update_msg, F_CIB_OBJID, ID(diff)); + crm_xml_add(update_msg, F_CIB_OBJTYPE, type); + attach_cib_generation(update_msg, "cib_generation", the_cib); + + if (update != NULL) { + add_message_xml(update_msg, F_CIB_UPDATE, update); + } + add_message_xml(update_msg, F_CIB_UPDATE_RESULT, diff); + + cib_notify_send(update_msg); + free_xml(update_msg); +} + +void +cib_replace_notify(const char *op, int result, const char *call_id, + const char *client_id, const char *client_name, + const char *origin, xmlNode *update, xmlNode *diff, + uint32_t change_section) +{ + xmlNode *replace_msg = NULL; + + int add_updates = 0; + int add_epoch = 0; + int add_admin_epoch = 0; + + int del_updates = 0; + int del_epoch = 0; + int del_admin_epoch = 0; + + uint8_t log_level = LOG_INFO; + + if (diff == NULL) { + return; + } + + if (result != pcmk_ok) { + log_level = LOG_WARNING; + } + + cib_diff_version_details(diff, &add_admin_epoch, &add_epoch, &add_updates, + &del_admin_epoch, &del_epoch, &del_updates); + + if (del_updates < 0) { + crm_log_xml_debug(diff, "Bad replace diff"); + } + + if ((add_admin_epoch != del_admin_epoch) + || (add_epoch != del_epoch) + || (add_updates != del_updates)) { + + do_crm_log(log_level, + "Replaced CIB generation %d.%d.%d with %d.%d.%d from client " + "%s%s%s (%s) (%s)", + del_admin_epoch, del_epoch, del_updates, + add_admin_epoch, add_epoch, add_updates, + client_name, + ((call_id != NULL)? " call " : ""), pcmk__s(call_id, ""), + pcmk__s(origin, "unspecified peer"), pcmk_strerror(result)); + + } else if ((add_admin_epoch != 0) + || (add_epoch != 0) + || (add_updates != 0)) { + + do_crm_log(log_level, + "Local-only replace of CIB generation %d.%d.%d from client " + "%s%s%s (%s) (%s)", + add_admin_epoch, add_epoch, add_updates, + client_name, + ((call_id != NULL)? " call " : ""), pcmk__s(call_id, ""), + pcmk__s(origin, "unspecified peer"), pcmk_strerror(result)); + } + + replace_msg = create_xml_node(NULL, "notify-replace"); + + crm_xml_add(replace_msg, F_TYPE, T_CIB_NOTIFY); + crm_xml_add(replace_msg, F_SUBTYPE, T_CIB_REPLACE_NOTIFY); + crm_xml_add(replace_msg, F_CIB_OPERATION, op); + crm_xml_add(replace_msg, F_CIB_CLIENTID, client_id); + crm_xml_add(replace_msg, F_CIB_CALLID, call_id); + crm_xml_add(replace_msg, F_ORIG, origin); + crm_xml_add_int(replace_msg, F_CIB_RC, result); + crm_xml_add_ll(replace_msg, F_CIB_CHANGE_SECTION, + (long long) change_section); + attach_cib_generation(replace_msg, "cib-replace-generation", update); + + /* We can include update and diff if a replace callback needs them. Until + * then, avoid the overhead. + */ + + crm_log_xml_trace(replace_msg, "CIB replaced"); + + cib_notify_send(replace_msg); + free_xml(replace_msg); +} diff --git a/daemons/based/based_remote.c b/daemons/based/based_remote.c new file mode 100644 index 0000000..38136d2 --- /dev/null +++ b/daemons/based/based_remote.c @@ -0,0 +1,680 @@ +/* + * Copyright 2004-2021 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include // PRIx64 +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "pacemaker-based.h" + +/* #undef HAVE_PAM_PAM_APPL_H */ +/* #undef HAVE_GNUTLS_GNUTLS_H */ + +#ifdef HAVE_GNUTLS_GNUTLS_H +# include +#endif + +#include +#include +#if HAVE_SECURITY_PAM_APPL_H +# include +# define HAVE_PAM 1 +#else +# if HAVE_PAM_PAM_APPL_H +# include +# define HAVE_PAM 1 +# endif +#endif + +extern int remote_tls_fd; +extern gboolean cib_shutdown_flag; + +int init_remote_listener(int port, gboolean encrypted); +void cib_remote_connection_destroy(gpointer user_data); + +#ifdef HAVE_GNUTLS_GNUTLS_H +gnutls_dh_params_t dh_params; +gnutls_anon_server_credentials_t anon_cred_s; +static void +debug_log(int level, const char *str) +{ + fputs(str, stderr); +} +#endif + +#define REMOTE_AUTH_TIMEOUT 10000 + +int num_clients; +int authenticate_user(const char *user, const char *passwd); +static int cib_remote_listen(gpointer data); +static int cib_remote_msg(gpointer data); + +static void +remote_connection_destroy(gpointer user_data) +{ + crm_info("No longer listening for remote connections"); + return; +} + +int +init_remote_listener(int port, gboolean encrypted) +{ + int rc; + int *ssock = NULL; + struct sockaddr_in saddr; + int optval; + + static struct mainloop_fd_callbacks remote_listen_fd_callbacks = { + .dispatch = cib_remote_listen, + .destroy = remote_connection_destroy, + }; + + if (port <= 0) { + /* don't start it */ + return 0; + } + + if (encrypted) { +#ifndef HAVE_GNUTLS_GNUTLS_H + crm_warn("TLS support is not available"); + return 0; +#else + crm_notice("Starting TLS listener on port %d", port); + crm_gnutls_global_init(); + /* gnutls_global_set_log_level (10); */ + gnutls_global_set_log_function(debug_log); + if (pcmk__init_tls_dh(&dh_params) != pcmk_rc_ok) { + return -1; + } + gnutls_anon_allocate_server_credentials(&anon_cred_s); + gnutls_anon_set_server_dh_params(anon_cred_s, dh_params); +#endif + } else { + crm_warn("Starting plain-text listener on port %d", port); + } +#ifndef HAVE_PAM + crm_warn("PAM is _not_ enabled!"); +#endif + + /* create server socket */ + ssock = malloc(sizeof(int)); + if(ssock == NULL) { + crm_perror(LOG_ERR, "Listener socket allocation failed"); + return -1; + } + + *ssock = socket(AF_INET, SOCK_STREAM, 0); + if (*ssock == -1) { + crm_perror(LOG_ERR, "Listener socket creation failed"); + free(ssock); + return -1; + } + + /* reuse address */ + optval = 1; + rc = setsockopt(*ssock, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)); + if (rc < 0) { + crm_perror(LOG_WARNING, + "Local address reuse not allowed on listener socket"); + } + + /* bind server socket */ + memset(&saddr, '\0', sizeof(saddr)); + saddr.sin_family = AF_INET; + saddr.sin_addr.s_addr = INADDR_ANY; + saddr.sin_port = htons(port); + if (bind(*ssock, (struct sockaddr *)&saddr, sizeof(saddr)) == -1) { + crm_perror(LOG_ERR, "Cannot bind to listener socket"); + close(*ssock); + free(ssock); + return -2; + } + if (listen(*ssock, 10) == -1) { + crm_perror(LOG_ERR, "Cannot listen on socket"); + close(*ssock); + free(ssock); + return -3; + } + + mainloop_add_fd("cib-remote", G_PRIORITY_DEFAULT, *ssock, ssock, &remote_listen_fd_callbacks); + crm_debug("Started listener on port %d", port); + + return *ssock; +} + +static int +check_group_membership(const char *usr, const char *grp) +{ + int index = 0; + struct passwd *pwd = NULL; + struct group *group = NULL; + + CRM_CHECK(usr != NULL, return FALSE); + CRM_CHECK(grp != NULL, return FALSE); + + pwd = getpwnam(usr); + if (pwd == NULL) { + crm_err("No user named '%s' exists!", usr); + return FALSE; + } + + group = getgrgid(pwd->pw_gid); + if (group != NULL && pcmk__str_eq(grp, group->gr_name, pcmk__str_none)) { + return TRUE; + } + + group = getgrnam(grp); + if (group == NULL) { + crm_err("No group named '%s' exists!", grp); + return FALSE; + } + + while (TRUE) { + char *member = group->gr_mem[index++]; + + if (member == NULL) { + break; + + } else if (pcmk__str_eq(usr, member, pcmk__str_none)) { + return TRUE; + } + }; + + return FALSE; +} + +static gboolean +cib_remote_auth(xmlNode * login) +{ + const char *user = NULL; + const char *pass = NULL; + const char *tmp = NULL; + + crm_log_xml_info(login, "Login: "); + if (login == NULL) { + return FALSE; + } + + tmp = crm_element_name(login); + if (!pcmk__str_eq(tmp, "cib_command", pcmk__str_casei)) { + crm_err("Wrong tag: %s", tmp); + return FALSE; + } + + tmp = crm_element_value(login, "op"); + if (!pcmk__str_eq(tmp, "authenticate", pcmk__str_casei)) { + crm_err("Wrong operation: %s", tmp); + return FALSE; + } + + user = crm_element_value(login, "user"); + pass = crm_element_value(login, "password"); + + if (!user || !pass) { + crm_err("missing auth credentials"); + return FALSE; + } + + /* Non-root daemons can only validate the password of the + * user they're running as + */ + if (check_group_membership(user, CRM_DAEMON_GROUP) == FALSE) { + crm_err("User is not a member of the required group"); + return FALSE; + + } else if (authenticate_user(user, pass) == FALSE) { + crm_err("PAM auth failed"); + return FALSE; + } + + return TRUE; +} + +static gboolean +remote_auth_timeout_cb(gpointer data) +{ + pcmk__client_t *client = data; + + client->remote->auth_timeout = 0; + + if (pcmk_is_set(client->flags, pcmk__client_authenticated)) { + return FALSE; + } + + mainloop_del_fd(client->remote->source); + crm_err("Remote client authentication timed out"); + + return FALSE; +} + +static int +cib_remote_listen(gpointer data) +{ + int csock = 0; + unsigned laddr; + struct sockaddr_storage addr; + char ipstr[INET6_ADDRSTRLEN]; + int ssock = *(int *)data; + int rc; + + pcmk__client_t *new_client = NULL; + + static struct mainloop_fd_callbacks remote_client_fd_callbacks = { + .dispatch = cib_remote_msg, + .destroy = cib_remote_connection_destroy, + }; + + /* accept the connection */ + laddr = sizeof(addr); + memset(&addr, 0, sizeof(addr)); + csock = accept(ssock, (struct sockaddr *)&addr, &laddr); + if (csock == -1) { + crm_perror(LOG_ERR, "Could not accept socket connection"); + return TRUE; + } + + pcmk__sockaddr2str(&addr, ipstr); + crm_debug("New %s connection from %s", + ((ssock == remote_tls_fd)? "secure" : "clear-text"), ipstr); + + rc = pcmk__set_nonblocking(csock); + if (rc != pcmk_rc_ok) { + crm_err("Could not set socket non-blocking: %s " CRM_XS " rc=%d", + pcmk_rc_str(rc), rc); + close(csock); + return TRUE; + } + + num_clients++; + + new_client = pcmk__new_unauth_client(NULL); + new_client->remote = calloc(1, sizeof(pcmk__remote_t)); + + if (ssock == remote_tls_fd) { +#ifdef HAVE_GNUTLS_GNUTLS_H + pcmk__set_client_flags(new_client, pcmk__client_tls); + + /* create gnutls session for the server socket */ + new_client->remote->tls_session = pcmk__new_tls_session(csock, + GNUTLS_SERVER, + GNUTLS_CRD_ANON, + anon_cred_s); + if (new_client->remote->tls_session == NULL) { + close(csock); + return TRUE; + } +#endif + } else { + pcmk__set_client_flags(new_client, pcmk__client_tcp); + new_client->remote->tcp_socket = csock; + } + + // Require the client to authenticate within this time + new_client->remote->auth_timeout = g_timeout_add(REMOTE_AUTH_TIMEOUT, + remote_auth_timeout_cb, + new_client); + crm_info("Remote CIB client pending authentication " + CRM_XS " %p id: %s", new_client, new_client->id); + + new_client->remote->source = + mainloop_add_fd("cib-remote-client", G_PRIORITY_DEFAULT, csock, new_client, + &remote_client_fd_callbacks); + + return TRUE; +} + +void +cib_remote_connection_destroy(gpointer user_data) +{ + pcmk__client_t *client = user_data; + int csock = 0; + + if (client == NULL) { + return; + } + + crm_trace("Cleaning up after client %s disconnect", + pcmk__client_name(client)); + + num_clients--; + crm_trace("Num unfree'd clients: %d", num_clients); + + switch (PCMK__CLIENT_TYPE(client)) { + case pcmk__client_tcp: + csock = client->remote->tcp_socket; + break; +#ifdef HAVE_GNUTLS_GNUTLS_H + case pcmk__client_tls: + if (client->remote->tls_session) { + void *sock_ptr = gnutls_transport_get_ptr(*client->remote->tls_session); + + csock = GPOINTER_TO_INT(sock_ptr); + if (pcmk_is_set(client->flags, + pcmk__client_tls_handshake_complete)) { + gnutls_bye(*client->remote->tls_session, GNUTLS_SHUT_WR); + } + gnutls_deinit(*client->remote->tls_session); + gnutls_free(client->remote->tls_session); + client->remote->tls_session = NULL; + } + break; +#endif + default: + crm_warn("Unknown transport for client %s " + CRM_XS " flags=%#016" PRIx64, + pcmk__client_name(client), client->flags); + } + + if (csock > 0) { + close(csock); + } + + pcmk__free_client(client); + + crm_trace("Freed the cib client"); + + if (cib_shutdown_flag) { + cib_shutdown(0); + } + return; +} + +static void +cib_handle_remote_msg(pcmk__client_t *client, xmlNode *command) +{ + const char *value = NULL; + + value = crm_element_name(command); + if (!pcmk__str_eq(value, "cib_command", pcmk__str_casei)) { + crm_log_xml_trace(command, "Bad command: "); + return; + } + + if (client->name == NULL) { + value = crm_element_value(command, F_CLIENTNAME); + if (value == NULL) { + client->name = strdup(client->id); + } else { + client->name = strdup(value); + } + } + + /* unset dangerous options */ + xml_remove_prop(command, F_ORIG); + xml_remove_prop(command, F_CIB_HOST); + xml_remove_prop(command, F_CIB_GLOBAL_UPDATE); + + crm_xml_add(command, F_TYPE, T_CIB); + crm_xml_add(command, F_CIB_CLIENTID, client->id); + crm_xml_add(command, F_CIB_CLIENTNAME, client->name); + crm_xml_add(command, F_CIB_USER, client->user); + + if (crm_element_value(command, F_CIB_CALLID) == NULL) { + char *call_uuid = crm_generate_uuid(); + + /* fix the command */ + crm_xml_add(command, F_CIB_CALLID, call_uuid); + free(call_uuid); + } + + if (crm_element_value(command, F_CIB_CALLOPTS) == NULL) { + crm_xml_add_int(command, F_CIB_CALLOPTS, 0); + } + + crm_log_xml_trace(command, "Remote command: "); + cib_common_callback_worker(0, 0, command, client, TRUE); +} + +static int +cib_remote_msg(gpointer data) +{ + xmlNode *command = NULL; + pcmk__client_t *client = data; + int rc; + int timeout = 1000; + + if (pcmk_is_set(client->flags, pcmk__client_authenticated)) { + timeout = -1; + } + + crm_trace("Remote %s message received for client %s", + pcmk__client_type_str(PCMK__CLIENT_TYPE(client)), + pcmk__client_name(client)); + +#ifdef HAVE_GNUTLS_GNUTLS_H + if ((PCMK__CLIENT_TYPE(client) == pcmk__client_tls) + && !pcmk_is_set(client->flags, pcmk__client_tls_handshake_complete)) { + + int rc = pcmk__read_handshake_data(client); + + if (rc == EAGAIN) { + /* No more data is available at the moment. Just return for now; + * we'll get invoked again once the client sends more. + */ + return 0; + } else if (rc != pcmk_rc_ok) { + return -1; + } + + crm_debug("TLS handshake with remote CIB client completed"); + pcmk__set_client_flags(client, pcmk__client_tls_handshake_complete); + if (client->remote->auth_timeout) { + g_source_remove(client->remote->auth_timeout); + } + + // Require the client to authenticate within this time + client->remote->auth_timeout = g_timeout_add(REMOTE_AUTH_TIMEOUT, + remote_auth_timeout_cb, + client); + return 0; + } +#endif + + rc = pcmk__read_remote_message(client->remote, timeout); + + /* must pass auth before we will process anything else */ + if (!pcmk_is_set(client->flags, pcmk__client_authenticated)) { + xmlNode *reg; + const char *user = NULL; + + command = pcmk__remote_message_xml(client->remote); + if (cib_remote_auth(command) == FALSE) { + free_xml(command); + return -1; + } + + crm_notice("Remote CIB client connection accepted"); + pcmk__set_client_flags(client, pcmk__client_authenticated); + g_source_remove(client->remote->auth_timeout); + client->remote->auth_timeout = 0; + client->name = crm_element_value_copy(command, "name"); + + user = crm_element_value(command, "user"); + if (user) { + client->user = strdup(user); + } + + /* send ACK */ + reg = create_xml_node(NULL, "cib_result"); + crm_xml_add(reg, F_CIB_OPERATION, CRM_OP_REGISTER); + crm_xml_add(reg, F_CIB_CLIENTID, client->id); + pcmk__remote_send_xml(client->remote, reg); + free_xml(reg); + free_xml(command); + } + + command = pcmk__remote_message_xml(client->remote); + while (command) { + crm_trace("Remote client message received"); + cib_handle_remote_msg(client, command); + free_xml(command); + command = pcmk__remote_message_xml(client->remote); + } + + if (rc == ENOTCONN) { + crm_trace("Remote CIB client disconnected while reading from it"); + return -1; + } + + return 0; +} + +#ifdef HAVE_PAM +static int +construct_pam_passwd(int num_msg, const struct pam_message **msg, + struct pam_response **response, void *data) +{ + int count = 0; + struct pam_response *reply; + char *string = (char *)data; + + CRM_CHECK(data, return PAM_CONV_ERR); + CRM_CHECK(num_msg == 1, return PAM_CONV_ERR); /* We only want to handle one message */ + + reply = calloc(1, sizeof(struct pam_response)); + CRM_ASSERT(reply != NULL); + + for (count = 0; count < num_msg; ++count) { + switch (msg[count]->msg_style) { + case PAM_TEXT_INFO: + crm_info("PAM: %s", msg[count]->msg); + break; + case PAM_PROMPT_ECHO_OFF: + case PAM_PROMPT_ECHO_ON: + reply[count].resp_retcode = 0; + reply[count].resp = string; /* We already made a copy */ + break; + case PAM_ERROR_MSG: + /* In theory we'd want to print this, but then + * we see the password prompt in the logs + */ + /* crm_err("PAM error: %s", msg[count]->msg); */ + break; + default: + crm_err("Unhandled conversation type: %d", msg[count]->msg_style); + goto bail; + } + } + + *response = reply; + reply = NULL; + + return PAM_SUCCESS; + + bail: + for (count = 0; count < num_msg; ++count) { + if (reply[count].resp != NULL) { + switch (msg[count]->msg_style) { + case PAM_PROMPT_ECHO_ON: + case PAM_PROMPT_ECHO_OFF: + /* Erase the data - it contained a password */ + while (*(reply[count].resp)) { + *(reply[count].resp)++ = '\0'; + } + free(reply[count].resp); + break; + } + reply[count].resp = NULL; + } + } + free(reply); + reply = NULL; + + return PAM_CONV_ERR; +} +#endif + +int +authenticate_user(const char *user, const char *passwd) +{ +#ifndef HAVE_PAM + gboolean pass = TRUE; +#else + int rc = 0; + gboolean pass = FALSE; + const void *p_user = NULL; + + struct pam_conv p_conv; + struct pam_handle *pam_h = NULL; + static const char *pam_name = NULL; + + if (pam_name == NULL) { + pam_name = getenv("CIB_pam_service"); + } + if (pam_name == NULL) { + pam_name = "login"; + } + + p_conv.conv = construct_pam_passwd; + p_conv.appdata_ptr = strdup(passwd); + + rc = pam_start(pam_name, user, &p_conv, &pam_h); + if (rc != PAM_SUCCESS) { + crm_err("Could not initialize PAM: %s (%d)", pam_strerror(pam_h, rc), rc); + goto bail; + } + + rc = pam_authenticate(pam_h, 0); + if (rc != PAM_SUCCESS) { + crm_err("Authentication failed for %s: %s (%d)", user, pam_strerror(pam_h, rc), rc); + goto bail; + } + + /* Make sure we authenticated the user we wanted to authenticate. + * Since we also run as non-root, it might be worth pre-checking + * the user has the same EID as us, since that the only user we + * can authenticate. + */ + rc = pam_get_item(pam_h, PAM_USER, &p_user); + if (rc != PAM_SUCCESS) { + crm_err("Internal PAM error: %s (%d)", pam_strerror(pam_h, rc), rc); + goto bail; + + } else if (p_user == NULL) { + crm_err("Unknown user authenticated."); + goto bail; + + } else if (!pcmk__str_eq(p_user, user, pcmk__str_casei)) { + crm_err("User mismatch: %s vs. %s.", (const char *)p_user, (const char *)user); + goto bail; + } + + rc = pam_acct_mgmt(pam_h, 0); + if (rc != PAM_SUCCESS) { + crm_err("Access denied: %s (%d)", pam_strerror(pam_h, rc), rc); + goto bail; + } + pass = TRUE; + + bail: + pam_end(pam_h, rc); +#endif + return pass; +} diff --git a/daemons/based/cib.pam b/daemons/based/cib.pam new file mode 100644 index 0000000..5d0f655 --- /dev/null +++ b/daemons/based/cib.pam @@ -0,0 +1,6 @@ +# login: auth account password session +# may require permission to read /etc/shadow +auth include common-auth +account include common-account +password include common-password +session include common-session diff --git a/daemons/based/pacemaker-based.c b/daemons/based/pacemaker-based.c new file mode 100644 index 0000000..129997e --- /dev/null +++ b/daemons/based/pacemaker-based.c @@ -0,0 +1,442 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define SUMMARY "daemon for managing the configuration of a Pacemaker cluster" + +extern int init_remote_listener(int port, gboolean encrypted); +gboolean cib_shutdown_flag = FALSE; +int cib_status = pcmk_ok; + +crm_cluster_t *crm_cluster = NULL; + +GMainLoop *mainloop = NULL; +gchar *cib_root = NULL; +static gboolean preserve_status = FALSE; + +gboolean cib_writes_enabled = TRUE; + +int remote_fd = 0; +int remote_tls_fd = 0; + +GHashTable *config_hash = NULL; +GHashTable *local_notify_queue = NULL; + +pcmk__output_t *logger_out = NULL; + +static void cib_init(void); +void cib_shutdown(int nsig); +static bool startCib(const char *filename); +extern int write_cib_contents(gpointer p); + +static crm_exit_t exit_code = CRM_EX_OK; + +static void +cib_enable_writes(int nsig) +{ + crm_info("(Re)enabling disk writes"); + cib_writes_enabled = TRUE; +} + +/*! + * \internal + * \brief Set up options, users, and groups for stand-alone mode + * + * \param[out] error GLib error object + * + * \return Standard Pacemaker return code + */ +static int +setup_stand_alone(GError **error) +{ + int rc = 0; + struct passwd *pwentry = NULL; + + preserve_status = TRUE; + cib_writes_enabled = FALSE; + + errno = 0; + pwentry = getpwnam(CRM_DAEMON_USER); + if (pwentry == NULL) { + exit_code = CRM_EX_FATAL; + if (errno != 0) { + g_set_error(error, PCMK__EXITC_ERROR, exit_code, + "Error getting password DB entry for %s: %s", + CRM_DAEMON_USER, strerror(errno)); + return errno; + } + g_set_error(error, PCMK__EXITC_ERROR, exit_code, + "Password DB entry for '%s' not found", CRM_DAEMON_USER); + return ENXIO; + } + + rc = setgid(pwentry->pw_gid); + if (rc < 0) { + exit_code = CRM_EX_FATAL; + g_set_error(error, PCMK__EXITC_ERROR, exit_code, + "Could not set group to %d: %s", + pwentry->pw_gid, strerror(errno)); + return errno; + } + + rc = initgroups(CRM_DAEMON_USER, pwentry->pw_gid); + if (rc < 0) { + exit_code = CRM_EX_FATAL; + g_set_error(error, PCMK__EXITC_ERROR, exit_code, + "Could not setup groups for user %d: %s", + pwentry->pw_uid, strerror(errno)); + return errno; + } + + rc = setuid(pwentry->pw_uid); + if (rc < 0) { + exit_code = CRM_EX_FATAL; + g_set_error(error, PCMK__EXITC_ERROR, exit_code, + "Could not set user to %d: %s", + pwentry->pw_uid, strerror(errno)); + return errno; + } + return pcmk_rc_ok; +} + +static GOptionEntry entries[] = { + { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone, + "(Advanced use only) Run in stand-alone mode", NULL }, + + { "disk-writes", 'w', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, + &cib_writes_enabled, + "(Advanced use only) Enable disk writes (enabled by default unless in " + "stand-alone mode)", NULL }, + + { "cib-root", 'r', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME, &cib_root, + "(Advanced use only) Directory where the CIB XML file should be located " + "(default: " CRM_CONFIG_DIR ")", NULL }, + + { NULL } +}; + +static pcmk__supported_format_t formats[] = { + PCMK__SUPPORTED_FORMAT_NONE, + PCMK__SUPPORTED_FORMAT_TEXT, + PCMK__SUPPORTED_FORMAT_XML, + { NULL, NULL, NULL } +}; + +static GOptionContext * +build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) +{ + GOptionContext *context = NULL; + + context = pcmk__build_arg_context(args, "text (default), xml", group, + "[metadata]"); + pcmk__add_main_args(context, entries); + return context; +} + +int +main(int argc, char **argv) +{ + int rc = pcmk_rc_ok; + crm_ipc_t *old_instance = NULL; + + pcmk__output_t *out = NULL; + + GError *error = NULL; + + GOptionGroup *output_group = NULL; + pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); + gchar **processed_args = pcmk__cmdline_preproc(argv, "r"); + GOptionContext *context = build_arg_context(args, &output_group); + + crm_log_preinit(NULL, argc, argv); + + pcmk__register_formats(output_group, formats); + if (!g_option_context_parse_strv(context, &processed_args, &error)) { + exit_code = CRM_EX_USAGE; + goto done; + } + + rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); + if (rc != pcmk_rc_ok) { + exit_code = CRM_EX_ERROR; + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "Error creating output format %s: %s", + args->output_ty, pcmk_rc_str(rc)); + goto done; + } + + if (args->version) { + out->version(out, false); + goto done; + } + + rc = pcmk__log_output_new(&logger_out); + if (rc != pcmk_rc_ok) { + exit_code = CRM_EX_ERROR; + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "Error creating output format log: %s", pcmk_rc_str(rc)); + goto done; + } + pcmk__output_set_log_level(logger_out, LOG_TRACE); + + mainloop_add_signal(SIGTERM, cib_shutdown); + mainloop_add_signal(SIGPIPE, cib_enable_writes); + + cib_writer = mainloop_add_trigger(G_PRIORITY_LOW, write_cib_contents, NULL); + + if ((g_strv_length(processed_args) >= 2) + && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { + cib_metadata(); + goto done; + } + + pcmk__cli_init_logging("pacemaker-based", args->verbosity); + crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); + crm_notice("Starting Pacemaker CIB manager"); + + old_instance = crm_ipc_new(PCMK__SERVER_BASED_RO, 0); + if (old_instance == NULL) { + /* crm_ipc_new() will have already logged an error message with + * crm_err() + */ + exit_code = CRM_EX_FATAL; + goto done; + } + + if (crm_ipc_connect(old_instance)) { + /* IPC end-point already up */ + crm_ipc_close(old_instance); + crm_ipc_destroy(old_instance); + crm_err("pacemaker-based is already active, aborting startup"); + goto done; + } else { + /* not up or not authentic, we'll proceed either way */ + crm_ipc_destroy(old_instance); + old_instance = NULL; + } + + if (stand_alone) { + rc = setup_stand_alone(&error); + if (rc != pcmk_rc_ok) { + goto done; + } + } + + if (cib_root == NULL) { + cib_root = g_strdup(CRM_CONFIG_DIR); + } else { + crm_notice("Using custom config location: %s", cib_root); + } + + if (!pcmk__daemon_can_write(cib_root, NULL)) { + exit_code = CRM_EX_FATAL; + crm_err("Terminating due to bad permissions on %s", cib_root); + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "Bad permissions on %s (see logs for details)", cib_root); + goto done; + } + + crm_peer_init(); + + // Read initial CIB, connect to cluster, and start IPC servers + cib_init(); + + // Run the main loop + mainloop = g_main_loop_new(NULL, FALSE); + crm_notice("Pacemaker CIB manager successfully started and accepting connections"); + g_main_loop_run(mainloop); + + /* If main loop returned, clean up and exit. We disconnect in case + * terminate_cib() was called with fast=-1. + */ + crm_cluster_disconnect(crm_cluster); + pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm); + +done: + g_strfreev(processed_args); + pcmk__free_arg_context(context); + + crm_peer_destroy(); + + if (local_notify_queue != NULL) { + g_hash_table_destroy(local_notify_queue); + } + + if (config_hash != NULL) { + g_hash_table_destroy(config_hash); + } + pcmk__client_cleanup(); + pcmk_cluster_free(crm_cluster); + g_free(cib_root); + + pcmk__output_and_clear_error(&error, out); + + if (out != NULL) { + out->finish(out, exit_code, true, NULL); + pcmk__output_free(out); + } + pcmk__unregister_formats(); + crm_exit(exit_code); +} + +#if SUPPORT_COROSYNC +static void +cib_cs_dispatch(cpg_handle_t handle, + const struct cpg_name *groupName, + uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) +{ + uint32_t kind = 0; + xmlNode *xml = NULL; + const char *from = NULL; + char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); + + if(data == NULL) { + return; + } + if (kind == crm_class_cluster) { + xml = string2xml(data); + if (xml == NULL) { + crm_err("Invalid XML: '%.120s'", data); + free(data); + return; + } + crm_xml_add(xml, F_ORIG, from); + /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */ + cib_peer_callback(xml, NULL); + } + + free_xml(xml); + free(data); +} + +static void +cib_cs_destroy(gpointer user_data) +{ + if (cib_shutdown_flag) { + crm_info("Corosync disconnection complete"); + } else { + crm_crit("Lost connection to cluster layer, shutting down"); + terminate_cib(__func__, CRM_EX_DISCONNECT); + } +} +#endif + +static void +cib_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) +{ + switch (type) { + case crm_status_processes: + if (cib_legacy_mode() + && !pcmk_is_set(node->processes, crm_get_cluster_proc())) { + + uint32_t old = data? *(const uint32_t *)data : 0; + + if ((node->processes ^ old) & crm_proc_cpg) { + crm_info("Attempting to disable legacy mode after %s left the cluster", + node->uname); + legacy_mode = FALSE; + } + } + break; + + case crm_status_uname: + case crm_status_nstate: + if (cib_shutdown_flag && (crm_active_peers() < 2) + && (pcmk__ipc_client_count() == 0)) { + + crm_info("No more peers"); + terminate_cib(__func__, -1); + } + break; + } +} + +static void +cib_init(void) +{ + crm_cluster = pcmk_cluster_new(); + +#if SUPPORT_COROSYNC + if (is_corosync_cluster()) { + crm_cluster->destroy = cib_cs_destroy; + crm_cluster->cpg.cpg_deliver_fn = cib_cs_dispatch; + crm_cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; + } +#endif // SUPPORT_COROSYNC + + config_hash = pcmk__strkey_table(free, free); + + if (startCib("cib.xml") == FALSE) { + crm_crit("Cannot start CIB... terminating"); + crm_exit(CRM_EX_NOINPUT); + } + + if (!stand_alone) { + crm_set_status_callback(&cib_peer_update_callback); + + if (!crm_cluster_connect(crm_cluster)) { + crm_crit("Cannot sign in to the cluster... terminating"); + crm_exit(CRM_EX_FATAL); + } + } + + pcmk__serve_based_ipc(&ipcs_ro, &ipcs_rw, &ipcs_shm, &ipc_ro_callbacks, + &ipc_rw_callbacks); + + if (stand_alone) { + based_is_primary = true; + } +} + +static bool +startCib(const char *filename) +{ + gboolean active = FALSE; + xmlNode *cib = readCibXmlFile(cib_root, filename, !preserve_status); + + if (activateCibXml(cib, TRUE, "start") == 0) { + int port = 0; + + active = TRUE; + + cib_read_config(config_hash, cib); + + pcmk__scan_port(crm_element_value(cib, "remote-tls-port"), &port); + if (port >= 0) { + remote_tls_fd = init_remote_listener(port, TRUE); + } + + pcmk__scan_port(crm_element_value(cib, "remote-clear-port"), &port); + if (port >= 0) { + remote_fd = init_remote_listener(port, FALSE); + } + } + return active; +} diff --git a/daemons/based/pacemaker-based.h b/daemons/based/pacemaker-based.h new file mode 100644 index 0000000..05e49b3 --- /dev/null +++ b/daemons/based/pacemaker-based.h @@ -0,0 +1,150 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef PACEMAKER_BASED__H +# define PACEMAKER_BASED__H + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_GNUTLS_GNUTLS_H +# include +#endif + +// CIB-specific client flags +enum cib_client_flags { + // Notifications + cib_notify_pre = (UINT64_C(1) << 0), + cib_notify_post = (UINT64_C(1) << 1), + cib_notify_replace = (UINT64_C(1) << 2), + cib_notify_confirm = (UINT64_C(1) << 3), + cib_notify_diff = (UINT64_C(1) << 4), + + // Whether client is another cluster daemon + cib_is_daemon = (UINT64_C(1) << 12), +}; + +typedef struct cib_operation_s { + const char *operation; + gboolean modifies_cib; + gboolean needs_privileges; + int (*prepare) (xmlNode *, xmlNode **, const char **); + int (*cleanup) (int, xmlNode **, xmlNode **); + int (*fn) (const char *, int, const char *, xmlNode *, + xmlNode *, xmlNode *, xmlNode **, xmlNode **); +} cib_operation_t; + +extern bool based_is_primary; +extern GHashTable *config_hash; +extern xmlNode *the_cib; +extern crm_trigger_t *cib_writer; +extern gboolean cib_writes_enabled; + +extern GMainLoop *mainloop; +extern crm_cluster_t *crm_cluster; +extern GHashTable *local_notify_queue; +extern gboolean legacy_mode; +extern gboolean stand_alone; +extern gboolean cib_shutdown_flag; +extern gchar *cib_root; +extern int cib_status; +extern pcmk__output_t *logger_out; + +extern struct qb_ipcs_service_handlers ipc_ro_callbacks; +extern struct qb_ipcs_service_handlers ipc_rw_callbacks; +extern qb_ipcs_service_t *ipcs_ro; +extern qb_ipcs_service_t *ipcs_rw; +extern qb_ipcs_service_t *ipcs_shm; + +void cib_peer_callback(xmlNode *msg, void *private_data); +void cib_common_callback_worker(uint32_t id, uint32_t flags, + xmlNode *op_request, pcmk__client_t *cib_client, + gboolean privileged); +void cib_shutdown(int nsig); +void terminate_cib(const char *caller, int fast); +gboolean cib_legacy_mode(void); + +gboolean uninitializeCib(void); +xmlNode *readCibXmlFile(const char *dir, const char *file, + gboolean discard_status); +int activateCibXml(xmlNode *doc, gboolean to_disk, const char *op); + +int cib_process_shutdown_req(const char *op, int options, const char *section, + xmlNode *req, xmlNode *input, + xmlNode *existing_cib, xmlNode **result_cib, + xmlNode **answer); +int cib_process_default(const char *op, int options, const char *section, + xmlNode *req, xmlNode *input, xmlNode *existing_cib, + xmlNode **result_cib, xmlNode **answer); +int cib_process_ping(const char *op, int options, const char *section, + xmlNode *req, xmlNode *input, xmlNode *existing_cib, + xmlNode **result_cib, xmlNode **answer); +int cib_process_readwrite(const char *op, int options, const char *section, + xmlNode *req, xmlNode *input, xmlNode *existing_cib, + xmlNode **result_cib, xmlNode **answer); +int cib_process_replace_svr(const char *op, int options, const char *section, + xmlNode *req, xmlNode *input, xmlNode *existing_cib, + xmlNode **result_cib, xmlNode **answer); +int cib_server_process_diff(const char *op, int options, const char *section, + xmlNode *req, xmlNode *input, xmlNode *existing_cib, + xmlNode **result_cib, xmlNode **answer); +int cib_process_sync(const char *op, int options, const char *section, + xmlNode *req, xmlNode *input, xmlNode *existing_cib, + xmlNode **result_cib, xmlNode **answer); +int cib_process_sync_one(const char *op, int options, const char *section, + xmlNode *req, xmlNode *input, xmlNode *existing_cib, + xmlNode **result_cib, xmlNode **answer); +int cib_process_delete_absolute(const char *op, int options, + const char *section, xmlNode *req, + xmlNode *input, xmlNode *existing_cib, + xmlNode **result_cib, xmlNode **answer); +int cib_process_upgrade_server(const char *op, int options, const char *section, + xmlNode *req, xmlNode *input, + xmlNode *existing_cib, xmlNode **result_cib, + xmlNode **answer); +void send_sync_request(const char *host); +int sync_our_cib(xmlNode *request, gboolean all); + +xmlNode *cib_msg_copy(xmlNode *msg, gboolean with_data); +int cib_get_operation_id(const char *op, int *operation); +cib_op_t *cib_op_func(int call_type); +gboolean cib_op_modifies(int call_type); +int cib_op_prepare(int call_type, xmlNode *request, xmlNode **input, + const char **section); +int cib_op_cleanup(int call_type, int options, xmlNode **input, + xmlNode **output); +int cib_op_can_run(int call_type, int call_options, bool privileged); +void cib_diff_notify(const char *op, int result, const char *call_id, + const char *client_id, const char *client_name, + const char *origin, xmlNode *update, xmlNode *diff); +void cib_replace_notify(const char *op, int result, const char *call_id, + const char *client_id, const char *client_name, + const char *origin, xmlNode *update, xmlNode *diff, + uint32_t change_section); + +static inline const char * +cib_config_lookup(const char *opt) +{ + return g_hash_table_lookup(config_hash, opt); +} + +#endif // PACEMAKER_BASED__H diff --git a/daemons/controld/Makefile.am b/daemons/controld/Makefile.am new file mode 100644 index 0000000..08be1ff --- /dev/null +++ b/daemons/controld/Makefile.am @@ -0,0 +1,87 @@ +# +# Copyright 2018-2023 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. +# + +include $(top_srcdir)/mk/common.mk +include $(top_srcdir)/mk/man.mk + +halibdir = $(CRM_DAEMON_DIR) + +halib_PROGRAMS = pacemaker-controld + +noinst_HEADERS = controld_alerts.h \ + controld_callbacks.h \ + controld_cib.h \ + controld_fencing.h \ + controld_fsa.h \ + controld_globals.h \ + controld_lrm.h \ + controld_membership.h \ + controld_messages.h \ + controld_metadata.h \ + controld_throttle.h \ + controld_timers.h \ + controld_transition.h \ + controld_utils.h \ + pacemaker-controld.h + +pacemaker_controld_CFLAGS = $(CFLAGS_HARDENED_EXE) +pacemaker_controld_LDFLAGS = $(LDFLAGS_HARDENED_EXE) + +pacemaker_controld_LDADD = $(top_builddir)/lib/fencing/libstonithd.la \ + $(top_builddir)/lib/pacemaker/libpacemaker.la \ + $(top_builddir)/lib/pengine/libpe_rules.la \ + $(top_builddir)/lib/cib/libcib.la \ + $(top_builddir)/lib/cluster/libcrmcluster.la \ + $(top_builddir)/lib/common/libcrmcommon.la \ + $(top_builddir)/lib/services/libcrmservice.la \ + $(top_builddir)/lib/lrmd/liblrmd.la \ + $(CLUSTERLIBS) + +pacemaker_controld_SOURCES = pacemaker-controld.c \ + controld_alerts.c \ + controld_attrd.c \ + controld_callbacks.c \ + controld_cib.c \ + controld_control.c \ + controld_corosync.c \ + controld_election.c \ + controld_execd.c \ + controld_execd_state.c \ + controld_fencing.c \ + controld_fsa.c \ + controld_join_client.c \ + controld_join_dc.c \ + controld_matrix.c \ + controld_membership.c \ + controld_messages.c \ + controld_metadata.c \ + controld_remote_ra.c \ + controld_schedulerd.c \ + controld_te_actions.c \ + controld_te_callbacks.c \ + controld_te_events.c \ + controld_te_utils.c \ + controld_throttle.c \ + controld_timers.c \ + controld_transition.c \ + controld_utils.c + +if BUILD_XML_HELP +man7_MANS = pacemaker-controld.7 +endif + +CLEANFILES = $(man7_MANS) + +if BUILD_LEGACY_LINKS +install-exec-hook: + cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f crmd && $(LN_S) pacemaker-controld crmd + +uninstall-hook: + cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f crmd +endif diff --git a/daemons/controld/controld_alerts.c b/daemons/controld/controld_alerts.c new file mode 100644 index 0000000..27a5ce2 --- /dev/null +++ b/daemons/controld/controld_alerts.c @@ -0,0 +1,88 @@ +/* + * Copyright 2012-2021 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +static GList *crmd_alert_list = NULL; + +void +crmd_unpack_alerts(xmlNode *alerts) +{ + pe_free_alert_list(crmd_alert_list); + crmd_alert_list = pe_unpack_alerts(alerts); +} + +void +crmd_alert_node_event(crm_node_t *node) +{ + lrm_state_t *lrm_state; + + if (crmd_alert_list == NULL) { + return; + } + + lrm_state = lrm_state_find(controld_globals.our_nodename); + if (lrm_state == NULL) { + return; + } + + lrmd_send_node_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, + node->uname, node->id, node->state); +} + +void +crmd_alert_fencing_op(stonith_event_t * e) +{ + char *desc; + lrm_state_t *lrm_state; + + if (crmd_alert_list == NULL) { + return; + } + + lrm_state = lrm_state_find(controld_globals.our_nodename); + if (lrm_state == NULL) { + return; + } + + desc = stonith__event_description(e); + lrmd_send_fencing_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, + e->target, e->operation, desc, e->result); + free(desc); +} + +void +crmd_alert_resource_op(const char *node, lrmd_event_data_t * op) +{ + lrm_state_t *lrm_state; + + if (crmd_alert_list == NULL) { + return; + } + + lrm_state = lrm_state_find(controld_globals.our_nodename); + if (lrm_state == NULL) { + return; + } + + lrmd_send_resource_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, node, + op); +} diff --git a/daemons/controld/controld_alerts.h b/daemons/controld/controld_alerts.h new file mode 100644 index 0000000..ec5852a --- /dev/null +++ b/daemons/controld/controld_alerts.h @@ -0,0 +1,22 @@ +/* + * Copyright 2015-2021 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef CONTROLD_ALERTS__H +# define CONTROLD_ALERTS__H + +# include +# include +# include + +void crmd_unpack_alerts(xmlNode *alerts); +void crmd_alert_node_event(crm_node_t *node); +void crmd_alert_fencing_op(stonith_event_t *e); +void crmd_alert_resource_op(const char *node, lrmd_event_data_t *op); + +#endif diff --git a/daemons/controld/controld_attrd.c b/daemons/controld/controld_attrd.c new file mode 100644 index 0000000..923abb9 --- /dev/null +++ b/daemons/controld/controld_attrd.c @@ -0,0 +1,160 @@ +/* + * Copyright 2006-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include +#include + +#include + +static pcmk_ipc_api_t *attrd_api = NULL; + +void +controld_close_attrd_ipc(void) +{ + if (attrd_api != NULL) { + crm_trace("Closing connection to pacemaker-attrd"); + pcmk_disconnect_ipc(attrd_api); + pcmk_free_ipc_api(attrd_api); + attrd_api = NULL; + } +} + +static inline const char * +node_type(bool is_remote) +{ + return is_remote? "Pacemaker Remote" : "cluster"; +} + +static inline const char * +when(void) +{ + return pcmk_is_set(controld_globals.fsa_input_register, + R_SHUTDOWN)? " at shutdown" : ""; +} + +static void +handle_attr_error(void) +{ + if (AM_I_DC) { + /* We are unable to provide accurate information to the + * scheduler, so allow another node to take over DC. + * @TODO Should we do this unconditionally on any failure? + */ + crmd_exit(CRM_EX_FATAL); + + } else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + // Fast-track shutdown since unable to request via attribute + register_fsa_input(C_FSA_INTERNAL, I_FAIL, NULL); + } +} + +void +update_attrd(const char *host, const char *name, const char *value, + const char *user_name, gboolean is_remote_node) +{ + int rc = pcmk_rc_ok; + + if (attrd_api == NULL) { + rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd); + } + if (rc == pcmk_rc_ok) { + uint32_t attrd_opts = pcmk__node_attr_value; + + if (is_remote_node) { + pcmk__set_node_attr_flags(attrd_opts, pcmk__node_attr_remote); + } + rc = pcmk__attrd_api_update(attrd_api, host, name, value, + NULL, NULL, user_name, attrd_opts); + } + if (rc != pcmk_rc_ok) { + do_crm_log(AM_I_DC? LOG_CRIT : LOG_ERR, + "Could not update attribute %s=%s for %s node %s%s: %s " + CRM_XS " rc=%d", name, value, node_type(is_remote_node), + host, when(), pcmk_rc_str(rc), rc); + handle_attr_error(); + } +} + +void +update_attrd_list(GList *attrs, uint32_t opts) +{ + int rc = pcmk_rc_ok; + + if (attrd_api == NULL) { + rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd); + } + if (rc == pcmk_rc_ok) { + rc = pcmk__attrd_api_update_list(attrd_api, attrs, NULL, NULL, NULL, + opts | pcmk__node_attr_value); + } + if (rc != pcmk_rc_ok) { + do_crm_log(AM_I_DC? LOG_CRIT : LOG_ERR, + "Could not update multiple node attributes: %s " + CRM_XS " rc=%d", pcmk_rc_str(rc), rc); + handle_attr_error(); + } +} + +void +update_attrd_remote_node_removed(const char *host, const char *user_name) +{ + int rc = pcmk_rc_ok; + + if (attrd_api == NULL) { + rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd); + } + if (rc == pcmk_rc_ok) { + crm_trace("Asking attribute manager to purge Pacemaker Remote node %s", + host); + rc = pcmk__attrd_api_purge(attrd_api, host); + } + if (rc != pcmk_rc_ok) { + crm_err("Could not purge Pacemaker Remote node %s " + "in attribute manager%s: %s " CRM_XS " rc=%d", + host, when(), pcmk_rc_str(rc), rc); + } +} + +void +update_attrd_clear_failures(const char *host, const char *rsc, const char *op, + const char *interval_spec, gboolean is_remote_node) +{ + int rc = pcmk_rc_ok; + + if (attrd_api == NULL) { + rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd); + } + if (rc == pcmk_rc_ok) { + const char *op_desc = pcmk__s(op, "operations"); + const char *interval_desc = "all"; + uint32_t attrd_opts = pcmk__node_attr_none; + + if (op != NULL) { + interval_desc = pcmk__s(interval_spec, "nonrecurring"); + } + if (is_remote_node) { + pcmk__set_node_attr_flags(attrd_opts, pcmk__node_attr_remote); + } + crm_info("Asking attribute manager to clear failure of %s %s for %s " + "on %s node %s", interval_desc, op_desc, rsc, + node_type(is_remote_node), host); + rc = pcmk__attrd_api_clear_failures(attrd_api, host, rsc, op, + interval_spec, NULL, attrd_opts); + } + if (rc != pcmk_rc_ok) { + crm_err("Could not clear failure attributes for %s on %s node %s%s: %s " + CRM_XS " rc=%d", pcmk__s(rsc, "all resources"), + node_type(is_remote_node), host, when(), pcmk_rc_str(rc), rc); + } +} diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c new file mode 100644 index 0000000..d578adc --- /dev/null +++ b/daemons/controld/controld_callbacks.c @@ -0,0 +1,367 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include + +/* From join_dc... */ +extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); + +void +crmd_ha_msg_filter(xmlNode * msg) +{ + if (AM_I_DC) { + const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); + + if (pcmk__str_eq(sys_from, CRM_SYSTEM_DC, pcmk__str_casei)) { + const char *from = crm_element_value(msg, F_ORIG); + + if (!pcmk__str_eq(from, controld_globals.our_nodename, + pcmk__str_casei)) { + int level = LOG_INFO; + const char *op = crm_element_value(msg, F_CRM_TASK); + + /* make sure the election happens NOW */ + if (controld_globals.fsa_state != S_ELECTION) { + ha_msg_input_t new_input; + + level = LOG_WARNING; + new_input.msg = msg; + register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input, + __func__); + } + + do_crm_log(level, "Another DC detected: %s (op=%s)", from, op); + goto done; + } + } + + } else { + const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); + + if (pcmk__str_eq(sys_to, CRM_SYSTEM_DC, pcmk__str_casei)) { + return; + } + } + + /* crm_log_xml_trace(msg, "HA[inbound]"); */ + route_message(C_HA_MESSAGE, msg); + + done: + controld_trigger_fsa(); +} + +/*! + * \internal + * \brief Check whether a node is online + * + * \param[in] node Node to check + * + * \retval -1 if completely dead + * \retval 0 if partially alive + * \retval 1 if completely alive + */ +static int +node_alive(const crm_node_t *node) +{ + if (pcmk_is_set(node->flags, crm_remote_node)) { + // Pacemaker Remote nodes can't be partially alive + return pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei) ? 1: -1; + + } else if (crm_is_peer_active(node)) { + // Completely up cluster node: both cluster member and peer + return 1; + + } else if (!pcmk_is_set(node->processes, crm_get_cluster_proc()) + && !pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)) { + // Completely down cluster node: neither cluster member nor peer + return -1; + } + + // Partially up cluster node: only cluster member or only peer + return 0; +} + +#define state_text(state) ((state)? (const char *)(state) : "in unknown state") + +void +peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) +{ + uint32_t old = 0; + bool appeared = FALSE; + bool is_remote = pcmk_is_set(node->flags, crm_remote_node); + + /* The controller waits to receive some information from the membership + * layer before declaring itself operational. If this is being called for a + * cluster node, indicate that we have it. + */ + if (!is_remote) { + controld_set_fsa_input_flags(R_PEER_DATA); + } + + if (type == crm_status_processes + && pcmk_is_set(node->processes, crm_get_cluster_proc()) + && !AM_I_DC + && !is_remote) { + /* + * This is a hack until we can send to a nodeid and/or we fix node name lookups + * These messages are ignored in crmd_ha_msg_filter() + */ + xmlNode *query = create_request(CRM_OP_HELLO, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); + + crm_debug("Sending hello to node %u so that it learns our node name", node->id); + send_cluster_message(node, crm_msg_crmd, query, FALSE); + + free_xml(query); + } + + if (node->uname == NULL) { + return; + } + + switch (type) { + case crm_status_uname: + /* If we've never seen the node, then it also won't be in the status section */ + crm_info("%s node %s is now %s", + (is_remote? "Remote" : "Cluster"), + node->uname, state_text(node->state)); + return; + + case crm_status_nstate: + /* This callback should not be called unless the state actually + * changed, but here's a failsafe just in case. + */ + CRM_CHECK(!pcmk__str_eq(data, node->state, pcmk__str_casei), + return); + + crm_info("%s node %s is now %s (was %s)", + (is_remote? "Remote" : "Cluster"), + node->uname, state_text(node->state), state_text(data)); + + if (pcmk__str_eq(CRM_NODE_MEMBER, node->state, pcmk__str_casei)) { + appeared = TRUE; + if (!is_remote) { + remove_stonith_cleanup(node->uname); + } + } else { + controld_remove_failed_sync_node(node->uname); + controld_remove_voter(node->uname); + } + + crmd_alert_node_event(node); + break; + + case crm_status_processes: + CRM_CHECK(data != NULL, return); + old = *(const uint32_t *)data; + appeared = pcmk_is_set(node->processes, crm_get_cluster_proc()); + + { + const char *dc_s = controld_globals.dc_name; + + if ((dc_s == NULL) && AM_I_DC) { + dc_s = "true"; + } + + crm_info("Node %s is %s a peer " CRM_XS + " DC=%s old=%#07x new=%#07x", + node->uname, (appeared? "now" : "no longer"), + pcmk__s(dc_s, ""), old, node->processes); + } + + if (!pcmk_is_set((node->processes ^ old), crm_get_cluster_proc())) { + /* Peer status did not change. This should not be possible, + * since we don't track process flags other than peer status. + */ + crm_trace("Process flag %#7x did not change from %#7x to %#7x", + crm_get_cluster_proc(), old, node->processes); + return; + + } + + if (!appeared) { + node->peer_lost = time(NULL); + controld_remove_failed_sync_node(node->uname); + controld_remove_voter(node->uname); + } + + if (!pcmk_is_set(controld_globals.fsa_input_register, + R_CIB_CONNECTED)) { + crm_trace("Ignoring peer status change because not connected to CIB"); + return; + + } else if (controld_globals.fsa_state == S_STOPPING) { + crm_trace("Ignoring peer status change because stopping"); + return; + } + + if (!appeared + && pcmk__str_eq(node->uname, controld_globals.our_nodename, + pcmk__str_casei)) { + /* Did we get evicted? */ + crm_notice("Our peer connection failed"); + register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL); + + } else if (pcmk__str_eq(node->uname, controld_globals.dc_name, + pcmk__str_casei) + && !crm_is_peer_active(node)) { + /* Did the DC leave us? */ + crm_notice("Our peer on the DC (%s) is dead", + controld_globals.dc_name); + register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL); + + /* @COMPAT DC < 1.1.13: If a DC shuts down normally, we don't + * want to fence it. Newer DCs will send their shutdown request + * to all peers, who will update the DC's expected state to + * down, thus avoiding fencing. We can safely erase the DC's + * transient attributes when it leaves in that case. However, + * the only way to avoid fencing older DCs is to leave the + * transient attributes intact until it rejoins. + */ + if (compare_version(controld_globals.dc_version, "3.0.9") > 0) { + controld_delete_node_state(node->uname, + controld_section_attrs, + cib_scope_local); + } + + } else if (AM_I_DC + || pcmk_is_set(controld_globals.flags, controld_dc_left) + || (controld_globals.dc_name == NULL)) { + /* This only needs to be done once, so normally the DC should do + * it. However if there is no DC, every node must do it, since + * there is no other way to ensure some one node does it. + */ + if (appeared) { + te_trigger_stonith_history_sync(FALSE); + } else { + controld_delete_node_state(node->uname, + controld_section_attrs, + cib_scope_local); + } + } + break; + } + + if (AM_I_DC) { + xmlNode *update = NULL; + int flags = node_update_peer; + int alive = node_alive(node); + pcmk__graph_action_t *down = match_down_event(node->uuid); + + crm_trace("Alive=%d, appeared=%d, down=%d", + alive, appeared, (down? down->id : -1)); + + if (appeared && (alive > 0) && !is_remote) { + register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); + } + + if (down) { + const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK); + + if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) { + + /* tengine_stonith_callback() confirms fence actions */ + crm_trace("Updating CIB %s fencer reported fencing of %s complete", + (pcmk_is_set(down->flags, pcmk__graph_action_confirmed)? "after" : "before"), node->uname); + + } else if (!appeared && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { + + // Shutdown actions are immediately confirmed (i.e. no_wait) + if (!is_remote) { + flags |= node_update_join | node_update_expected; + crmd_peer_down(node, FALSE); + check_join_state(controld_globals.fsa_state, __func__); + } + if (alive >= 0) { + crm_info("%s of peer %s is in progress " CRM_XS " action=%d", + task, node->uname, down->id); + } else { + crm_notice("%s of peer %s is complete " CRM_XS " action=%d", + task, node->uname, down->id); + pcmk__update_graph(controld_globals.transition_graph, down); + trigger_graph(); + } + + } else { + crm_trace("Node %s is %s, was expected to %s (op %d)", + node->uname, + ((alive > 0)? "alive" : + ((alive < 0)? "dead" : "partially alive")), + task, down->id); + } + + } else if (appeared == FALSE) { + if ((controld_globals.transition_graph == NULL) + || (controld_globals.transition_graph->id == -1)) { + crm_info("Stonith/shutdown of node %s is unknown to the " + "current DC", node->uname); + } else { + crm_warn("Stonith/shutdown of node %s was not expected", + node->uname); + } + if (!is_remote) { + crm_update_peer_join(__func__, node, crm_join_none); + check_join_state(controld_globals.fsa_state, __func__); + } + abort_transition(INFINITY, pcmk__graph_restart, "Node failure", + NULL); + fail_incompletable_actions(controld_globals.transition_graph, + node->uuid); + + } else { + crm_trace("Node %s came up, was not expected to be down", + node->uname); + } + + if (is_remote) { + /* A pacemaker_remote node won't have its cluster status updated + * in the CIB by membership-layer callbacks, so do it here. + */ + flags |= node_update_cluster; + + /* Trigger resource placement on newly integrated nodes */ + if (appeared) { + abort_transition(INFINITY, pcmk__graph_restart, + "Pacemaker Remote node integrated", NULL); + } + } + + /* Update the CIB node state */ + update = create_node_state_update(node, flags, NULL, __func__); + if (update == NULL) { + crm_debug("Node state update not yet possible for %s", node->uname); + } else { + fsa_cib_anon_update(XML_CIB_TAG_STATUS, update); + } + free_xml(update); + } + + controld_trigger_fsa(); +} + +gboolean +crm_fsa_trigger(gpointer user_data) +{ + crm_trace("Invoked (queue len: %d)", + g_list_length(controld_globals.fsa_message_queue)); + s_crmd_fsa(C_FSA_INTERNAL); + crm_trace("Exited (queue len: %d)", + g_list_length(controld_globals.fsa_message_queue)); + return TRUE; +} diff --git a/daemons/controld/controld_callbacks.h b/daemons/controld/controld_callbacks.h new file mode 100644 index 0000000..a69d515 --- /dev/null +++ b/daemons/controld/controld_callbacks.h @@ -0,0 +1,21 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef CONTROLD_CALLBACKS__H +# define CONTROLD_CALLBACKS__H + +#include + +extern void crmd_ha_msg_filter(xmlNode * msg); + +extern gboolean crm_fsa_trigger(gpointer user_data); + +extern void peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data); + +#endif diff --git a/daemons/controld/controld_cib.c b/daemons/controld/controld_cib.c new file mode 100644 index 0000000..94b99dd --- /dev/null +++ b/daemons/controld/controld_cib.c @@ -0,0 +1,1138 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include /* sleep */ + +#include +#include +#include +#include +#include + +#include + +// Call ID of the most recent in-progress CIB resource update (or 0 if none) +static int pending_rsc_update = 0; + +// Call IDs of requested CIB replacements that won't trigger a new election +// (used as a set of gint values) +static GHashTable *cib_replacements = NULL; + +/*! + * \internal + * \brief Store the call ID of a CIB replacement that the controller requested + * + * The \p do_cib_replaced() callback function will avoid triggering a new + * election when we're notified of one of these expected replacements. + * + * \param[in] call_id CIB call ID (or 0 for a synchronous call) + * + * \note This function should be called after making any asynchronous CIB + * request (or before making any synchronous CIB request) that may replace + * part of the nodes or status section. This may include CIB sync calls. + */ +void +controld_record_cib_replace_call(int call_id) +{ + CRM_CHECK(call_id >= 0, return); + + if (cib_replacements == NULL) { + cib_replacements = g_hash_table_new(NULL, NULL); + } + + /* If the call ID is already present in the table, then it's old. We may not + * be removing them properly, and we could improperly ignore replacement + * notifications if cib_t:call_id wraps around. + */ + CRM_LOG_ASSERT(g_hash_table_add(cib_replacements, + GINT_TO_POINTER((gint) call_id))); +} + +/*! + * \internal + * \brief Remove the call ID of a CIB replacement from the replacements table + * + * \param[in] call_id CIB call ID (or 0 for a synchronous call) + * + * \return \p true if \p call_id was found in the table, or \p false otherwise + * + * \note CIB notifications run before CIB callbacks. If this function is called + * from within a callback, \p do_cib_replaced() will have removed + * \p call_id from the table first if relevant changes triggered a + * notification. + */ +bool +controld_forget_cib_replace_call(int call_id) +{ + CRM_CHECK(call_id >= 0, return false); + + if (cib_replacements == NULL) { + return false; + } + return g_hash_table_remove(cib_replacements, + GINT_TO_POINTER((gint) call_id)); +} + +/*! + * \internal + * \brief Empty the hash table containing call IDs of CIB replacement requests + */ +void +controld_forget_all_cib_replace_calls(void) +{ + if (cib_replacements != NULL) { + g_hash_table_remove_all(cib_replacements); + } +} + +/*! + * \internal + * \brief Free the hash table containing call IDs of CIB replacement requests + */ +void +controld_destroy_cib_replacements_table(void) +{ + if (cib_replacements != NULL) { + g_hash_table_destroy(cib_replacements); + cib_replacements = NULL; + } +} + +/*! + * \internal + * \brief Respond to a dropped CIB connection + * + * \param[in] user_data CIB connection that dropped + */ +static void +handle_cib_disconnect(gpointer user_data) +{ + CRM_LOG_ASSERT(user_data == controld_globals.cib_conn); + + controld_trigger_fsa(); + controld_globals.cib_conn->state = cib_disconnected; + + if (pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { + // @TODO This should trigger a reconnect, not a shutdown + crm_crit("Lost connection to the CIB manager, shutting down"); + register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); + controld_clear_fsa_input_flags(R_CIB_CONNECTED); + + } else { // Expected + crm_info("Connection to the CIB manager terminated"); + } +} + +static void +do_cib_updated(const char *event, xmlNode * msg) +{ + if (pcmk__alert_in_patchset(msg, TRUE)) { + controld_trigger_config(); + } +} + +static void +do_cib_replaced(const char *event, xmlNode * msg) +{ + int call_id = 0; + const char *client_id = crm_element_value(msg, F_CIB_CLIENTID); + uint32_t change_section = cib_change_section_nodes + |cib_change_section_status; + long long value = 0; + + crm_debug("Updating the CIB after a replace: DC=%s", pcmk__btoa(AM_I_DC)); + if (!AM_I_DC) { + return; + } + + if ((crm_element_value_int(msg, F_CIB_CALLID, &call_id) == 0) + && pcmk__str_eq(client_id, controld_globals.cib_client_id, + pcmk__str_none) + && controld_forget_cib_replace_call(call_id)) { + // We requested this replace op. No need to restart the join. + return; + } + + if ((crm_element_value_ll(msg, F_CIB_CHANGE_SECTION, &value) < 0) + || (value < 0) || (value > UINT32_MAX)) { + + crm_trace("Couldn't parse '%s' from message", F_CIB_CHANGE_SECTION); + } else { + change_section = (uint32_t) value; + } + + if (pcmk_any_flags_set(change_section, cib_change_section_nodes + |cib_change_section_status)) { + + /* start the join process again so we get everyone's LRM status */ + populate_cib_nodes(node_update_quick|node_update_all, __func__); + + register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); + } +} + +void +controld_disconnect_cib_manager(void) +{ + cib_t *cib_conn = controld_globals.cib_conn; + + CRM_ASSERT(cib_conn != NULL); + + crm_info("Disconnecting from the CIB manager"); + + controld_clear_fsa_input_flags(R_CIB_CONNECTED); + + cib_conn->cmds->del_notify_callback(cib_conn, T_CIB_REPLACE_NOTIFY, + do_cib_replaced); + cib_conn->cmds->del_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY, + do_cib_updated); + cib_free_callbacks(cib_conn); + + if (cib_conn->state != cib_disconnected) { + cib_conn->cmds->set_secondary(cib_conn, + cib_scope_local|cib_discard_reply); + cib_conn->cmds->signoff(cib_conn); + } + + crm_notice("Disconnected from the CIB manager"); +} + +/* A_CIB_STOP, A_CIB_START, O_CIB_RESTART */ +void +do_cib_control(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + static int cib_retries = 0; + + cib_t *cib_conn = controld_globals.cib_conn; + + void (*dnotify_fn) (gpointer user_data) = handle_cib_disconnect; + void (*replace_cb) (const char *event, xmlNodePtr msg) = do_cib_replaced; + void (*update_cb) (const char *event, xmlNodePtr msg) = do_cib_updated; + + int rc = pcmk_ok; + + CRM_ASSERT(cib_conn != NULL); + + if (pcmk_is_set(action, A_CIB_STOP)) { + if ((cib_conn->state != cib_disconnected) + && (pending_rsc_update != 0)) { + + crm_info("Waiting for resource update %d to complete", + pending_rsc_update); + crmd_fsa_stall(FALSE); + return; + } + controld_disconnect_cib_manager(); + } + + if (!pcmk_is_set(action, A_CIB_START)) { + return; + } + + if (cur_state == S_STOPPING) { + crm_err("Ignoring request to connect to the CIB manager after " + "shutdown"); + return; + } + + rc = cib_conn->cmds->signon(cib_conn, CRM_SYSTEM_CRMD, + cib_command_nonblocking); + + if (rc != pcmk_ok) { + // A short wait that usually avoids stalling the FSA + sleep(1); + rc = cib_conn->cmds->signon(cib_conn, CRM_SYSTEM_CRMD, + cib_command_nonblocking); + } + + if (rc != pcmk_ok) { + crm_info("Could not connect to the CIB manager: %s", pcmk_strerror(rc)); + + } else if (cib_conn->cmds->set_connection_dnotify(cib_conn, + dnotify_fn) != pcmk_ok) { + crm_err("Could not set dnotify callback"); + + } else if (cib_conn->cmds->add_notify_callback(cib_conn, + T_CIB_REPLACE_NOTIFY, + replace_cb) != pcmk_ok) { + crm_err("Could not set CIB notification callback (replace)"); + + } else if (cib_conn->cmds->add_notify_callback(cib_conn, + T_CIB_DIFF_NOTIFY, + update_cb) != pcmk_ok) { + crm_err("Could not set CIB notification callback (update)"); + + } else { + controld_set_fsa_input_flags(R_CIB_CONNECTED); + cib_retries = 0; + cib_conn->cmds->client_id(cib_conn, &controld_globals.cib_client_id, + NULL); + } + + if (!pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { + cib_retries++; + + if (cib_retries < 30) { + crm_warn("Couldn't complete CIB registration %d times... " + "pause and retry", cib_retries); + controld_start_wait_timer(); + crmd_fsa_stall(FALSE); + + } else { + crm_err("Could not complete CIB registration %d times... " + "hard error", cib_retries); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } + } +} + +#define MIN_CIB_OP_TIMEOUT (30) + +/*! + * \internal + * \brief Get the timeout (in seconds) that should be used with CIB operations + * + * \return The maximum of 30 seconds, the value of the PCMK_cib_timeout + * environment variable, or 10 seconds times one more than the number of + * nodes in the cluster. + */ +unsigned int +cib_op_timeout(void) +{ + static int env_timeout = -1; + unsigned int calculated_timeout = 0; + + if (env_timeout == -1) { + const char *env = getenv("PCMK_cib_timeout"); + + pcmk__scan_min_int(env, &env_timeout, MIN_CIB_OP_TIMEOUT); + crm_trace("Minimum CIB op timeout: %ds (environment: %s)", + env_timeout, (env? env : "none")); + } + + calculated_timeout = 1 + crm_active_peers(); + if (crm_remote_peer_cache) { + calculated_timeout += g_hash_table_size(crm_remote_peer_cache); + } + calculated_timeout *= 10; + + calculated_timeout = QB_MAX(calculated_timeout, env_timeout); + crm_trace("Calculated timeout: %us", calculated_timeout); + + if (controld_globals.cib_conn) { + controld_globals.cib_conn->call_timeout = calculated_timeout; + } + return calculated_timeout; +} + +/*! + * \internal + * \brief Get CIB call options to use local scope if primary is unavailable + * + * \return CIB call options + */ +int +crmd_cib_smart_opt(void) +{ + int call_opt = cib_none; + + if ((controld_globals.fsa_state == S_ELECTION) + || (controld_globals.fsa_state == S_PENDING)) { + crm_info("Sending update to local CIB in state: %s", + fsa_state2string(controld_globals.fsa_state)); + cib__set_call_options(call_opt, "update", cib_scope_local); + } + return call_opt; +} + +static void +cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, + void *user_data) +{ + char *desc = user_data; + + if (rc == 0) { + crm_debug("Deletion of %s (via CIB call %d) succeeded", desc, call_id); + } else { + crm_warn("Deletion of %s (via CIB call %d) failed: %s " CRM_XS " rc=%d", + desc, call_id, pcmk_strerror(rc), rc); + } +} + +// Searches for various portions of node_state to delete + +// Match a particular node's node_state (takes node name 1x) +#define XPATH_NODE_STATE "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" + +// Node's lrm section (name 1x) +#define XPATH_NODE_LRM XPATH_NODE_STATE "/" XML_CIB_TAG_LRM + +/* Node's lrm_rsc_op entries and lrm_resource entries without unexpired lock + * (name 2x, (seconds_since_epoch - XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT) 1x) + */ +#define XPATH_NODE_LRM_UNLOCKED XPATH_NODE_STATE "//" XML_LRM_TAG_RSC_OP \ + "|" XPATH_NODE_STATE \ + "//" XML_LRM_TAG_RESOURCE \ + "[not(@" XML_CONFIG_ATTR_SHUTDOWN_LOCK ") " \ + "or " XML_CONFIG_ATTR_SHUTDOWN_LOCK "<%lld]" + +// Node's transient_attributes section (name 1x) +#define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" XML_TAG_TRANSIENT_NODEATTRS + +// Everything under node_state (name 1x) +#define XPATH_NODE_ALL XPATH_NODE_STATE "/*" + +/* Unlocked history + transient attributes + * (name 2x, (seconds_since_epoch - XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT) 1x, + * name 1x) + */ +#define XPATH_NODE_ALL_UNLOCKED XPATH_NODE_LRM_UNLOCKED "|" XPATH_NODE_ATTRS + +/*! + * \internal + * \brief Delete subsection of a node's CIB node_state + * + * \param[in] uname Desired node + * \param[in] section Subsection of node_state to delete + * \param[in] options CIB call options to use + */ +void +controld_delete_node_state(const char *uname, enum controld_section_e section, + int options) +{ + cib_t *cib_conn = controld_globals.cib_conn; + + char *xpath = NULL; + char *desc = NULL; + + // Shutdown locks that started before this time are expired + long long expire = (long long) time(NULL) + - controld_globals.shutdown_lock_limit; + + CRM_CHECK(uname != NULL, return); + switch (section) { + case controld_section_lrm: + xpath = crm_strdup_printf(XPATH_NODE_LRM, uname); + desc = crm_strdup_printf("resource history for node %s", uname); + break; + case controld_section_lrm_unlocked: + xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED, + uname, uname, expire); + desc = crm_strdup_printf("resource history (other than shutdown " + "locks) for node %s", uname); + break; + case controld_section_attrs: + xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname); + desc = crm_strdup_printf("transient attributes for node %s", uname); + break; + case controld_section_all: + xpath = crm_strdup_printf(XPATH_NODE_ALL, uname); + desc = crm_strdup_printf("all state for node %s", uname); + break; + case controld_section_all_unlocked: + xpath = crm_strdup_printf(XPATH_NODE_ALL_UNLOCKED, + uname, uname, expire, uname); + desc = crm_strdup_printf("all state (other than shutdown locks) " + "for node %s", uname); + break; + } + + if (cib_conn == NULL) { + crm_warn("Unable to delete %s: no CIB connection", desc); + free(desc); + } else { + int call_id; + + cib__set_call_options(options, "node state deletion", + cib_xpath|cib_multiple); + call_id = cib_conn->cmds->remove(cib_conn, xpath, NULL, options); + crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s", + desc, call_id, xpath); + fsa_register_cib_callback(call_id, desc, cib_delete_callback); + // CIB library handles freeing desc + } + free(xpath); +} + +// Takes node name and resource ID +#define XPATH_RESOURCE_HISTORY "//" XML_CIB_TAG_STATE \ + "[@" XML_ATTR_UNAME "='%s']/" \ + XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ + "/" XML_LRM_TAG_RESOURCE \ + "[@" XML_ATTR_ID "='%s']" +// @TODO could add "and @XML_CONFIG_ATTR_SHUTDOWN_LOCK" to limit to locks + +/*! + * \internal + * \brief Clear resource history from CIB for a given resource and node + * + * \param[in] rsc_id ID of resource to be cleared + * \param[in] node Node whose resource history should be cleared + * \param[in] user_name ACL user name to use + * \param[in] call_options CIB call options + * + * \return Standard Pacemaker return code + */ +int +controld_delete_resource_history(const char *rsc_id, const char *node, + const char *user_name, int call_options) +{ + char *desc = NULL; + char *xpath = NULL; + int rc = pcmk_rc_ok; + + CRM_CHECK((rsc_id != NULL) && (node != NULL), return EINVAL); + + desc = crm_strdup_printf("resource history for %s on %s", rsc_id, node); + if (controld_globals.cib_conn == NULL) { + crm_err("Unable to clear %s: no CIB connection", desc); + free(desc); + return ENOTCONN; + } + + // Ask CIB to delete the entry + xpath = crm_strdup_printf(XPATH_RESOURCE_HISTORY, node, rsc_id); + rc = cib_internal_op(controld_globals.cib_conn, PCMK__CIB_REQUEST_DELETE, + NULL, xpath, NULL, NULL, call_options|cib_xpath, + user_name); + + if (rc < 0) { + rc = pcmk_legacy2rc(rc); + crm_err("Could not delete resource status of %s on %s%s%s: %s " + CRM_XS " rc=%d", rsc_id, node, + (user_name? " for user " : ""), (user_name? user_name : ""), + pcmk_rc_str(rc), rc); + free(desc); + free(xpath); + return rc; + } + + if (pcmk_is_set(call_options, cib_sync_call)) { + if (pcmk_is_set(call_options, cib_dryrun)) { + crm_debug("Deletion of %s would succeed", desc); + } else { + crm_debug("Deletion of %s succeeded", desc); + } + free(desc); + + } else { + crm_info("Clearing %s (via CIB call %d) " CRM_XS " xpath=%s", + desc, rc, xpath); + fsa_register_cib_callback(rc, desc, cib_delete_callback); + // CIB library handles freeing desc + } + + free(xpath); + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Build XML and string of parameters meeting some criteria, for digest + * + * \param[in] op Executor event with parameter table to use + * \param[in] metadata Parsed meta-data for executed resource agent + * \param[in] param_type Flag used for selection criteria + * \param[out] result Will be set to newly created XML with selected + * parameters as attributes + * + * \return Newly allocated space-separated string of parameter names + * \note Selection criteria varies by param_type: for the restart digest, we + * want parameters that are *not* marked reloadable (OCF 1.1) or that + * *are* marked unique (pre-1.1), for both string and XML results; for the + * secure digest, we want parameters that *are* marked private for the + * string, but parameters that are *not* marked private for the XML. + * \note It is the caller's responsibility to free the string return value with + * \p g_string_free() and the XML result with \p free_xml(). + */ +static GString * +build_parameter_list(const lrmd_event_data_t *op, + const struct ra_metadata_s *metadata, + enum ra_param_flags_e param_type, xmlNode **result) +{ + GString *list = NULL; + + *result = create_xml_node(NULL, XML_TAG_PARAMS); + + /* Consider all parameters only except private ones to be consistent with + * what scheduler does with calculate_secure_digest(). + */ + if (param_type == ra_param_private + && compare_version(controld_globals.dc_version, "3.16.0") >= 0) { + g_hash_table_foreach(op->params, hash2field, *result); + pcmk__filter_op_for_digest(*result); + } + + for (GList *iter = metadata->ra_params; iter != NULL; iter = iter->next) { + struct ra_param_s *param = (struct ra_param_s *) iter->data; + + bool accept_for_list = false; + bool accept_for_xml = false; + + switch (param_type) { + case ra_param_reloadable: + accept_for_list = !pcmk_is_set(param->rap_flags, param_type); + accept_for_xml = accept_for_list; + break; + + case ra_param_unique: + accept_for_list = pcmk_is_set(param->rap_flags, param_type); + accept_for_xml = accept_for_list; + break; + + case ra_param_private: + accept_for_list = pcmk_is_set(param->rap_flags, param_type); + accept_for_xml = !accept_for_list; + break; + } + + if (accept_for_list) { + crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type)); + + if (list == NULL) { + // We will later search for " WORD ", so start list with a space + pcmk__add_word(&list, 256, " "); + } + pcmk__add_word(&list, 0, param->rap_name); + + } else { + crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type)); + } + + if (accept_for_xml) { + const char *v = g_hash_table_lookup(op->params, param->rap_name); + + if (v != NULL) { + crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v); + crm_xml_add(*result, param->rap_name, v); + } + + } else { + crm_trace("Removing attr %s from the xml result", param->rap_name); + xml_remove_prop(*result, param->rap_name); + } + } + + if (list != NULL) { + // We will later search for " WORD ", so end list with a space + pcmk__add_word(&list, 0, " "); + } + return list; +} + +static void +append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, + xmlNode *update, const char *version) +{ + GString *list = NULL; + char *digest = NULL; + xmlNode *restart = NULL; + + CRM_LOG_ASSERT(op->params != NULL); + + if (op->interval_ms > 0) { + /* monitors are not reloadable */ + return; + } + + if (pcmk_is_set(metadata->ra_flags, ra_supports_reload_agent)) { + // Add parameters not marked reloadable to the "op-force-restart" list + list = build_parameter_list(op, metadata, ra_param_reloadable, + &restart); + + } else if (pcmk_is_set(metadata->ra_flags, ra_supports_legacy_reload)) { + /* @COMPAT pre-OCF-1.1 resource agents + * + * Before OCF 1.1, Pacemaker abused "unique=0" to indicate + * reloadability. Add any parameters with unique="1" to the + * "op-force-restart" list. + */ + list = build_parameter_list(op, metadata, ra_param_unique, &restart); + + } else { + // Resource does not support agent reloads + return; + } + + digest = calculate_operation_digest(restart, version); + /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload, + * no matter if it actually supports any parameters with unique="1"). */ + crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, + (list == NULL)? "" : (const char *) list->str); + crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); + + if ((list != NULL) && (list->len > 0)) { + crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str); + } else { + crm_trace("%s: %s", op->rsc_id, digest); + } + + if (list != NULL) { + g_string_free(list, TRUE); + } + free_xml(restart); + free(digest); +} + +static void +append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, + xmlNode *update, const char *version) +{ + GString *list = NULL; + char *digest = NULL; + xmlNode *secure = NULL; + + CRM_LOG_ASSERT(op->params != NULL); + + /* + * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the + * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on + * the insecure ones + */ + list = build_parameter_list(op, metadata, ra_param_private, &secure); + + if (list != NULL) { + digest = calculate_operation_digest(secure, version); + crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, (const char *) list->str); + crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest); + + crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str); + g_string_free(list, TRUE); + } else { + crm_trace("%s: no secure parameters", op->rsc_id); + } + + free_xml(secure); + free(digest); +} + +/*! + * \internal + * \brief Create XML for a resource history entry + * + * \param[in] func Function name of caller + * \param[in,out] parent XML to add entry to + * \param[in] rsc Affected resource + * \param[in,out] op Action to add an entry for (or NULL to do nothing) + * \param[in] node_name Node where action occurred + */ +void +controld_add_resource_history_xml_as(const char *func, xmlNode *parent, + const lrmd_rsc_info_t *rsc, + lrmd_event_data_t *op, + const char *node_name) +{ + int target_rc = 0; + xmlNode *xml_op = NULL; + struct ra_metadata_s *metadata = NULL; + const char *caller_version = NULL; + lrm_state_t *lrm_state = NULL; + + if (op == NULL) { + return; + } + + target_rc = rsc_op_expected_rc(op); + + caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION); + CRM_CHECK(caller_version != NULL, caller_version = CRM_FEATURE_SET); + + xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc, + controld_globals.our_nodename, func); + if (xml_op == NULL) { + return; + } + + if ((rsc == NULL) || (op->params == NULL) + || !crm_op_needs_metadata(rsc->standard, op->op_type)) { + + crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)", + op->op_type, op->rsc_id, op->params, rsc); + return; + } + + lrm_state = lrm_state_find(node_name); + if (lrm_state == NULL) { + crm_warn("Cannot calculate digests for operation " PCMK__OP_FMT + " because we have no connection to executor for %s", + op->rsc_id, op->op_type, op->interval_ms, node_name); + return; + } + + /* Ideally the metadata is cached, and the agent is just a fallback. + * + * @TODO Go through all callers and ensure they get metadata asynchronously + * first. + */ + metadata = controld_get_rsc_metadata(lrm_state, rsc, + controld_metadata_from_agent + |controld_metadata_from_cache); + if (metadata == NULL) { + return; + } + + crm_trace("Including additional digests for %s:%s:%s", + rsc->standard, rsc->provider, rsc->type); + append_restart_list(op, metadata, xml_op, caller_version); + append_secure_list(op, metadata, xml_op, caller_version); + + return; +} + +/*! + * \internal + * \brief Record an action as pending in the CIB, if appropriate + * + * \param[in] node_name Node where the action is pending + * \param[in] rsc Resource that action is for + * \param[in,out] op Pending action + * + * \return true if action was recorded in CIB, otherwise false + */ +bool +controld_record_pending_op(const char *node_name, const lrmd_rsc_info_t *rsc, + lrmd_event_data_t *op) +{ + const char *record_pending = NULL; + + CRM_CHECK((node_name != NULL) && (rsc != NULL) && (op != NULL), + return false); + + // Never record certain operation types as pending + if ((op->op_type == NULL) || (op->params == NULL) + || !controld_action_is_recordable(op->op_type)) { + return false; + } + + // Check action's record-pending meta-attribute (defaults to true) + record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING); + if ((record_pending != NULL) && !crm_is_true(record_pending)) { + return false; + } + + op->call_id = -1; + op->t_run = time(NULL); + op->t_rcchange = op->t_run; + + lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); + + crm_debug("Recording pending %s-interval %s for %s on %s in the CIB", + pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id, + node_name); + controld_update_resource_history(node_name, rsc, op, 0); + return true; +} + +static void +cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + switch (rc) { + case pcmk_ok: + case -pcmk_err_diff_failed: + case -pcmk_err_diff_resync: + crm_trace("Resource update %d complete: rc=%d", call_id, rc); + break; + default: + crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc)); + } + + if (call_id == pending_rsc_update) { + pending_rsc_update = 0; + controld_trigger_fsa(); + } +} + +/* Only successful stops, and probes that found the resource inactive, get locks + * recorded in the history. This ensures the resource stays locked to the node + * until it is active there again after the node comes back up. + */ +static bool +should_preserve_lock(lrmd_event_data_t *op) +{ + if (!pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { + return false; + } + if (!strcmp(op->op_type, RSC_STOP) && (op->rc == PCMK_OCF_OK)) { + return true; + } + if (!strcmp(op->op_type, RSC_STATUS) && (op->rc == PCMK_OCF_NOT_RUNNING)) { + return true; + } + return false; +} + +/*! + * \internal + * \brief Request a CIB update + * + * \param[in] section Section of CIB to update + * \param[in,out] data New XML of CIB section to update + * \param[in] options CIB call options + * \param[in] callback If not NULL, set this as the operation callback + * + * \return Standard Pacemaker return code + * + * \note If \p callback is \p cib_rsc_callback(), the CIB update's call ID is + * stored in \p pending_rsc_update on success. + */ +int +controld_update_cib(const char *section, xmlNode *data, int options, + void (*callback)(xmlNode *, int, int, xmlNode *, void *)) +{ + int cib_rc = -ENOTCONN; + + CRM_ASSERT(data != NULL); + + if (controld_globals.cib_conn != NULL) { + cib_rc = cib_internal_op(controld_globals.cib_conn, + PCMK__CIB_REQUEST_MODIFY, NULL, section, + data, NULL, options, NULL); + if (cib_rc >= 0) { + crm_debug("Submitted CIB update %d for %s section", + cib_rc, section); + } + } + + if (callback == NULL) { + if (cib_rc < 0) { + crm_err("Failed to update CIB %s section: %s", + section, pcmk_rc_str(pcmk_legacy2rc(cib_rc))); + } + + } else { + if ((cib_rc >= 0) && (callback == cib_rsc_callback)) { + /* Checking for a particular callback is a little hacky, but it + * didn't seem worth adding an output argument for cib_rc for just + * one use case. + */ + pending_rsc_update = cib_rc; + } + fsa_register_cib_callback(cib_rc, NULL, callback); + } + + return (cib_rc >= 0)? pcmk_rc_ok : pcmk_legacy2rc(cib_rc); +} + +/*! + * \internal + * \brief Update resource history entry in CIB + * + * \param[in] node_name Node where action occurred + * \param[in] rsc Resource that action is for + * \param[in,out] op Action to record + * \param[in] lock_time If nonzero, when resource was locked to node + * + * \note On success, the CIB update's call ID will be stored in + * pending_rsc_update. + */ +void +controld_update_resource_history(const char *node_name, + const lrmd_rsc_info_t *rsc, + lrmd_event_data_t *op, time_t lock_time) +{ + xmlNode *update = NULL; + xmlNode *xml = NULL; + int call_opt = crmd_cib_smart_opt(); + const char *node_id = NULL; + const char *container = NULL; + + CRM_CHECK((node_name != NULL) && (op != NULL), return); + + if (rsc == NULL) { + crm_warn("Resource %s no longer exists in the executor", op->rsc_id); + controld_ack_event_directly(NULL, NULL, rsc, op, op->rsc_id); + return; + } + + // + update = create_xml_node(NULL, XML_CIB_TAG_STATUS); + + // + xml = create_xml_node(update, XML_CIB_TAG_STATE); + if (pcmk__str_eq(node_name, controld_globals.our_nodename, + pcmk__str_casei)) { + node_id = controld_globals.our_uuid; + } else { + node_id = node_name; + pcmk__xe_set_bool_attr(xml, XML_NODE_IS_REMOTE, true); + } + crm_xml_add(xml, XML_ATTR_ID, node_id); + crm_xml_add(xml, XML_ATTR_UNAME, node_name); + crm_xml_add(xml, XML_ATTR_ORIGIN, __func__); + + // + xml = create_xml_node(xml, XML_CIB_TAG_LRM); + crm_xml_add(xml, XML_ATTR_ID, node_id); + + // + xml = create_xml_node(xml, XML_LRM_TAG_RESOURCES); + + // + xml = create_xml_node(xml, XML_LRM_TAG_RESOURCE); + crm_xml_add(xml, XML_ATTR_ID, op->rsc_id); + crm_xml_add(xml, XML_AGENT_ATTR_CLASS, rsc->standard); + crm_xml_add(xml, XML_AGENT_ATTR_PROVIDER, rsc->provider); + crm_xml_add(xml, XML_ATTR_TYPE, rsc->type); + if (lock_time != 0) { + /* Actions on a locked resource should either preserve the lock by + * recording it with the action result, or clear it. + */ + if (!should_preserve_lock(op)) { + lock_time = 0; + } + crm_xml_add_ll(xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK, + (long long) lock_time); + } + if (op->params != NULL) { + container = g_hash_table_lookup(op->params, + CRM_META "_" XML_RSC_ATTR_CONTAINER); + if (container != NULL) { + crm_trace("Resource %s is a part of container resource %s", + op->rsc_id, container); + crm_xml_add(xml, XML_RSC_ATTR_CONTAINER, container); + } + } + + // (possibly more than one) + controld_add_resource_history_xml(xml, rsc, op, node_name); + + /* Update CIB asynchronously. Even if it fails, the resource state should be + * discovered during the next election. Worst case, the node is wrongly + * fenced for running a resource it isn't. + */ + crm_log_xml_trace(update, __func__); + controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, cib_rsc_callback); + free_xml(update); +} + +/*! + * \internal + * \brief Erase an LRM history entry from the CIB, given the operation data + * + * \param[in] op Operation whose history should be deleted + */ +void +controld_delete_action_history(const lrmd_event_data_t *op) +{ + xmlNode *xml_top = NULL; + + CRM_CHECK(op != NULL, return); + + xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); + crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); + crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); + + if (op->interval_ms > 0) { + char *op_id = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms); + + /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ + crm_xml_add(xml_top, XML_ATTR_ID, op_id); + free(op_id); + } + + crm_debug("Erasing resource operation history for " PCMK__OP_FMT " (call=%d)", + op->rsc_id, op->op_type, op->interval_ms, op->call_id); + + controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, + XML_CIB_TAG_STATUS, xml_top, + cib_none); + + crm_log_xml_trace(xml_top, "op:cancel"); + free_xml(xml_top); +} + +/* Define xpath to find LRM resource history entry by node and resource */ +#define XPATH_HISTORY \ + "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ + "/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \ + "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ + "/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \ + "/" XML_LRM_TAG_RSC_OP + +/* ... and also by operation key */ +#define XPATH_HISTORY_ID XPATH_HISTORY \ + "[@" XML_ATTR_ID "='%s']" + +/* ... and also by operation key and operation call ID */ +#define XPATH_HISTORY_CALL XPATH_HISTORY \ + "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_CALLID "='%d']" + +/* ... and also by operation key and original operation key */ +#define XPATH_HISTORY_ORIG XPATH_HISTORY \ + "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_TASK_KEY "='%s']" + +/*! + * \internal + * \brief Delete a last_failure resource history entry from the CIB + * + * \param[in] rsc_id Name of resource to clear history for + * \param[in] node Name of node to clear history for + * \param[in] action If specified, delete only if this was failed action + * \param[in] interval_ms If \p action is specified, it has this interval + */ +void +controld_cib_delete_last_failure(const char *rsc_id, const char *node, + const char *action, guint interval_ms) +{ + char *xpath = NULL; + char *last_failure_key = NULL; + + CRM_CHECK((rsc_id != NULL) && (node != NULL), return); + + // Generate XPath to match desired entry + last_failure_key = pcmk__op_key(rsc_id, "last_failure", 0); + if (action == NULL) { + xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id, + last_failure_key); + } else { + char *action_key = pcmk__op_key(rsc_id, action, interval_ms); + + xpath = crm_strdup_printf(XPATH_HISTORY_ORIG, node, rsc_id, + last_failure_key, action_key); + free(action_key); + } + free(last_failure_key); + + controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath, + NULL, cib_xpath); + free(xpath); +} + +/*! + * \internal + * \brief Delete resource history entry from the CIB, given operation key + * + * \param[in] rsc_id Name of resource to clear history for + * \param[in] node Name of node to clear history for + * \param[in] key Operation key of operation to clear history for + * \param[in] call_id If specified, delete entry only if it has this call ID + */ +void +controld_delete_action_history_by_key(const char *rsc_id, const char *node, + const char *key, int call_id) +{ + char *xpath = NULL; + + CRM_CHECK((rsc_id != NULL) && (node != NULL) && (key != NULL), return); + + if (call_id > 0) { + xpath = crm_strdup_printf(XPATH_HISTORY_CALL, node, rsc_id, key, + call_id); + } else { + xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id, key); + } + controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath, + NULL, cib_xpath); + free(xpath); +} diff --git a/daemons/controld/controld_cib.h b/daemons/controld/controld_cib.h new file mode 100644 index 0000000..bd9492a --- /dev/null +++ b/daemons/controld/controld_cib.h @@ -0,0 +1,125 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef PCMK__CONTROLD_CIB__H +#define PCMK__CONTROLD_CIB__H + +#include + +#include + +#include +#include +#include // PCMK__CIB_REQUEST_MODIFY +#include "controld_globals.h" // controld_globals.cib_conn + +static inline void +fsa_cib_anon_update(const char *section, xmlNode *data) { + if (controld_globals.cib_conn == NULL) { + crm_err("No CIB connection available"); + } else { + controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn, + section, data, + cib_scope_local|cib_can_create); + } +} + +static inline void +fsa_cib_anon_update_discard_reply(const char *section, xmlNode *data) { + if (controld_globals.cib_conn == NULL) { + crm_err("No CIB connection available"); + } else { + controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn, + section, data, + cib_scope_local + |cib_can_create + |cib_discard_reply); + } +} + +void controld_record_cib_replace_call(int call_id); +bool controld_forget_cib_replace_call(int call_id); +void controld_forget_all_cib_replace_calls(void); +void controld_destroy_cib_replacements_table(void); + +int controld_update_cib(const char *section, xmlNode *data, int options, + void (*callback)(xmlNode *, int, int, xmlNode *, + void *)); +unsigned int cib_op_timeout(void); + +// Subsections of node_state +enum controld_section_e { + controld_section_lrm, + controld_section_lrm_unlocked, + controld_section_attrs, + controld_section_all, + controld_section_all_unlocked +}; + +void controld_delete_node_state(const char *uname, + enum controld_section_e section, int options); +int controld_delete_resource_history(const char *rsc_id, const char *node, + const char *user_name, int call_options); + +/* Convenience macro for registering a CIB callback + * (assumes that data can be freed with free()) + */ +# define fsa_register_cib_callback(id, data, fn) do { \ + cib_t *cib_conn = controld_globals.cib_conn; \ + \ + CRM_ASSERT(cib_conn != NULL); \ + cib_conn->cmds->register_callback_full(cib_conn, id, cib_op_timeout(), \ + FALSE, data, #fn, fn, free); \ + } while(0) + +void controld_add_resource_history_xml_as(const char *func, xmlNode *parent, + const lrmd_rsc_info_t *rsc, + lrmd_event_data_t *op, + const char *node_name); + +#define controld_add_resource_history_xml(parent, rsc, op, node_name) \ + controld_add_resource_history_xml_as(__func__, (parent), (rsc), \ + (op), (node_name)) + +bool controld_record_pending_op(const char *node_name, + const lrmd_rsc_info_t *rsc, + lrmd_event_data_t *op); + +void controld_update_resource_history(const char *node_name, + const lrmd_rsc_info_t *rsc, + lrmd_event_data_t *op, time_t lock_time); + +void controld_delete_action_history(const lrmd_event_data_t *op); + +void controld_cib_delete_last_failure(const char *rsc_id, const char *node, + const char *action, guint interval_ms); + +void controld_delete_action_history_by_key(const char *rsc_id, const char *node, + const char *key, int call_id); + +void controld_disconnect_cib_manager(void); + +int crmd_cib_smart_opt(void); + +/*! + * \internal + * \brief Check whether an action type should be recorded in the CIB + * + * \param[in] action Action type + * + * \return true if action should be recorded, false otherwise + */ +static inline bool +controld_action_is_recordable(const char *action) +{ + return !pcmk__str_any_of(action, CRMD_ACTION_CANCEL, CRMD_ACTION_DELETE, + CRMD_ACTION_NOTIFY, CRMD_ACTION_METADATA, NULL); +} + +#endif // PCMK__CONTROLD_CIB__H diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c new file mode 100644 index 0000000..ffc62a0 --- /dev/null +++ b/daemons/controld/controld_control.c @@ -0,0 +1,857 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +static qb_ipcs_service_t *ipcs = NULL; + +static crm_trigger_t *config_read_trigger = NULL; + +#if SUPPORT_COROSYNC +extern gboolean crm_connect_corosync(crm_cluster_t * cluster); +#endif + +void crm_shutdown(int nsig); +static gboolean crm_read_options(gpointer user_data); + +/* A_HA_CONNECT */ +void +do_ha_control(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + gboolean registered = FALSE; + static crm_cluster_t *cluster = NULL; + + if (cluster == NULL) { + cluster = pcmk_cluster_new(); + } + + if (action & A_HA_DISCONNECT) { + crm_cluster_disconnect(cluster); + crm_info("Disconnected from the cluster"); + + controld_set_fsa_input_flags(R_HA_DISCONNECTED); + } + + if (action & A_HA_CONNECT) { + crm_set_status_callback(&peer_update_callback); + crm_set_autoreap(FALSE); + +#if SUPPORT_COROSYNC + if (is_corosync_cluster()) { + registered = crm_connect_corosync(cluster); + } +#endif // SUPPORT_COROSYNC + + if (registered) { + controld_election_init(cluster->uname); + controld_globals.our_nodename = cluster->uname; + controld_globals.our_uuid = cluster->uuid; + if(cluster->uuid == NULL) { + crm_err("Could not obtain local uuid"); + registered = FALSE; + } + } + + if (!registered) { + controld_set_fsa_input_flags(R_HA_DISCONNECTED); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + return; + } + + populate_cib_nodes(node_update_none, __func__); + controld_clear_fsa_input_flags(R_HA_DISCONNECTED); + crm_info("Connected to the cluster"); + } + + if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) { + crm_err("Unexpected action %s in %s", fsa_action2string(action), + __func__); + } +} + +/* A_SHUTDOWN */ +void +do_shutdown(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + /* just in case */ + controld_set_fsa_input_flags(R_SHUTDOWN); + controld_disconnect_fencer(FALSE); +} + +/* A_SHUTDOWN_REQ */ +void +do_shutdown_req(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + xmlNode *msg = NULL; + + controld_set_fsa_input_flags(R_SHUTDOWN); + //controld_set_fsa_input_flags(R_STAYDOWN); + crm_info("Sending shutdown request to all peers (DC is %s)", + pcmk__s(controld_globals.dc_name, "not set")); + msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); + + if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } + free_xml(msg); +} + +void +crmd_fast_exit(crm_exit_t exit_code) +{ + if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) { + crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d", + exit_code, CRM_EX_FATAL); + exit_code = CRM_EX_FATAL; + + } else if ((exit_code == CRM_EX_OK) + && pcmk_is_set(controld_globals.fsa_input_register, + R_IN_RECOVERY)) { + crm_err("Could not recover from internal error"); + exit_code = CRM_EX_ERROR; + } + + if (controld_globals.logger_out != NULL) { + controld_globals.logger_out->finish(controld_globals.logger_out, + exit_code, true, NULL); + pcmk__output_free(controld_globals.logger_out); + controld_globals.logger_out = NULL; + } + + crm_exit(exit_code); +} + +crm_exit_t +crmd_exit(crm_exit_t exit_code) +{ + GMainLoop *mloop = controld_globals.mainloop; + + static bool in_progress = FALSE; + + if (in_progress && (exit_code == CRM_EX_OK)) { + crm_debug("Exit is already in progress"); + return exit_code; + + } else if(in_progress) { + crm_notice("Error during shutdown process, exiting now with status %d (%s)", + exit_code, crm_exit_str(exit_code)); + crm_write_blackbox(SIGTRAP, NULL); + crmd_fast_exit(exit_code); + } + + in_progress = TRUE; + crm_trace("Preparing to exit with status %d (%s)", + exit_code, crm_exit_str(exit_code)); + + /* Suppress secondary errors resulting from us disconnecting everything */ + controld_set_fsa_input_flags(R_HA_DISCONNECTED); + +/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */ + + if(ipcs) { + crm_trace("Closing IPC server"); + mainloop_del_ipc_server(ipcs); + ipcs = NULL; + } + + controld_close_attrd_ipc(); + controld_shutdown_schedulerd_ipc(); + controld_disconnect_fencer(TRUE); + + if ((exit_code == CRM_EX_OK) && (controld_globals.mainloop == NULL)) { + crm_debug("No mainloop detected"); + exit_code = CRM_EX_ERROR; + } + + /* On an error, just get out. + * + * Otherwise, make the effort to have mainloop exit gracefully so + * that it (mostly) cleans up after itself and valgrind has less + * to report on - allowing real errors stand out + */ + if (exit_code != CRM_EX_OK) { + crm_notice("Forcing immediate exit with status %d (%s)", + exit_code, crm_exit_str(exit_code)); + crm_write_blackbox(SIGTRAP, NULL); + crmd_fast_exit(exit_code); + } + +/* Clean up as much memory as possible for valgrind */ + + for (GList *iter = controld_globals.fsa_message_queue; iter != NULL; + iter = iter->next) { + fsa_data_t *fsa_data = (fsa_data_t *) iter->data; + + crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]", + fsa_input2string(fsa_data->fsa_input), + fsa_state2string(controld_globals.fsa_state), + fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); + delete_fsa_input(fsa_data); + } + + controld_clear_fsa_input_flags(R_MEMBERSHIP); + + g_list_free(controld_globals.fsa_message_queue); + controld_globals.fsa_message_queue = NULL; + + controld_election_fini(); + + /* Tear down the CIB manager connection, but don't free it yet -- it could + * be used when we drain the mainloop later. + */ + + controld_disconnect_cib_manager(); + + verify_stopped(controld_globals.fsa_state, LOG_WARNING); + controld_clear_fsa_input_flags(R_LRM_CONNECTED); + lrm_state_destroy_all(); + + mainloop_destroy_trigger(config_read_trigger); + config_read_trigger = NULL; + + controld_destroy_fsa_trigger(); + controld_destroy_transition_trigger(); + + pcmk__client_cleanup(); + crm_peer_destroy(); + + controld_free_fsa_timers(); + te_cleanup_stonith_history_sync(NULL, TRUE); + controld_free_sched_timer(); + + free(controld_globals.our_nodename); + controld_globals.our_nodename = NULL; + + free(controld_globals.our_uuid); + controld_globals.our_uuid = NULL; + + free(controld_globals.dc_name); + controld_globals.dc_name = NULL; + + free(controld_globals.dc_version); + controld_globals.dc_version = NULL; + + free(controld_globals.cluster_name); + controld_globals.cluster_name = NULL; + + free(controld_globals.te_uuid); + controld_globals.te_uuid = NULL; + + free_max_generation(); + controld_destroy_cib_replacements_table(); + controld_destroy_failed_sync_table(); + controld_destroy_outside_events_table(); + + mainloop_destroy_signal(SIGPIPE); + mainloop_destroy_signal(SIGUSR1); + mainloop_destroy_signal(SIGTERM); + mainloop_destroy_signal(SIGTRAP); + /* leave SIGCHLD engaged as we might still want to drain some service-actions */ + + if (mloop) { + GMainContext *ctx = g_main_loop_get_context(controld_globals.mainloop); + + /* Don't re-enter this block */ + controld_globals.mainloop = NULL; + + /* no signals on final draining anymore */ + mainloop_destroy_signal(SIGCHLD); + + crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); + + { + int lpc = 0; + + while((g_main_context_pending(ctx) && lpc < 10)) { + lpc++; + crm_trace("Iteration %d", lpc); + g_main_context_dispatch(ctx); + } + } + + crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); + g_main_loop_quit(mloop); + + /* Won't do anything yet, since we're inside it now */ + g_main_loop_unref(mloop); + } else { + mainloop_destroy_signal(SIGCHLD); + } + + cib_delete(controld_globals.cib_conn); + controld_globals.cib_conn = NULL; + + throttle_fini(); + + /* Graceful */ + crm_trace("Done preparing for exit with status %d (%s)", + exit_code, crm_exit_str(exit_code)); + return exit_code; +} + +/* A_EXIT_0, A_EXIT_1 */ +void +do_exit(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + crm_exit_t exit_code = CRM_EX_OK; + int log_level = LOG_INFO; + const char *exit_type = "gracefully"; + + if (action & A_EXIT_1) { + log_level = LOG_ERR; + exit_type = "forcefully"; + exit_code = CRM_EX_ERROR; + } + + verify_stopped(cur_state, LOG_ERR); + do_crm_log(log_level, "Performing %s - %s exiting the controller", + fsa_action2string(action), exit_type); + + crm_info("[%s] stopped (%d)", crm_system_name, exit_code); + crmd_exit(exit_code); +} + +static void sigpipe_ignore(int nsig) { return; } + +/* A_STARTUP */ +void +do_startup(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + crm_debug("Registering Signal Handlers"); + mainloop_add_signal(SIGTERM, crm_shutdown); + mainloop_add_signal(SIGPIPE, sigpipe_ignore); + + config_read_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, + crm_read_options, NULL); + + controld_init_fsa_trigger(); + controld_init_transition_trigger(); + + crm_debug("Creating CIB manager and executor objects"); + controld_globals.cib_conn = cib_new(); + + lrm_state_init_local(); + if (controld_init_fsa_timers() == FALSE) { + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } +} + +// \return libqb error code (0 on success, -errno on error) +static int32_t +accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) +{ + crm_trace("Accepting new IPC client connection"); + if (pcmk__new_client(c, uid, gid) == NULL) { + return -EIO; + } + return 0; +} + +// \return libqb error code (0 on success, -errno on error) +static int32_t +dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size) +{ + uint32_t id = 0; + uint32_t flags = 0; + pcmk__client_t *client = pcmk__find_client(c); + + xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags); + + if (msg == NULL) { + pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL); + return 0; + } + pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_INDETERMINATE); + + CRM_ASSERT(client->user != NULL); + pcmk__update_acl_user(msg, F_CRM_USER, client->user); + + crm_xml_add(msg, F_CRM_SYS_FROM, client->id); + if (controld_authorize_ipc_message(msg, client, NULL)) { + crm_trace("Processing IPC message from client %s", + pcmk__client_name(client)); + route_message(C_IPC_MESSAGE, msg); + } + + controld_trigger_fsa(); + free_xml(msg); + return 0; +} + +static int32_t +ipc_client_disconnected(qb_ipcs_connection_t *c) +{ + pcmk__client_t *client = pcmk__find_client(c); + + if (client) { + crm_trace("Disconnecting %sregistered client %s (%p/%p)", + (client->userdata? "" : "un"), pcmk__client_name(client), + c, client); + free(client->userdata); + pcmk__free_client(client); + controld_trigger_fsa(); + } + return 0; +} + +static void +ipc_connection_destroyed(qb_ipcs_connection_t *c) +{ + crm_trace("Connection %p", c); + ipc_client_disconnected(c); +} + +/* A_STOP */ +void +do_stop(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + crm_trace("Closing IPC server"); + mainloop_del_ipc_server(ipcs); ipcs = NULL; + register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); +} + +/* A_STARTED */ +void +do_started(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + static struct qb_ipcs_service_handlers crmd_callbacks = { + .connection_accept = accept_controller_client, + .connection_created = NULL, + .msg_process = dispatch_controller_ipc, + .connection_closed = ipc_client_disconnected, + .connection_destroyed = ipc_connection_destroyed + }; + + if (cur_state != S_STARTING) { + crm_err("Start cancelled... %s", fsa_state2string(cur_state)); + return; + + } else if (!pcmk_is_set(controld_globals.fsa_input_register, + R_MEMBERSHIP)) { + crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP); + + crmd_fsa_stall(TRUE); + return; + + } else if (!pcmk_is_set(controld_globals.fsa_input_register, + R_LRM_CONNECTED)) { + crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED); + + crmd_fsa_stall(TRUE); + return; + + } else if (!pcmk_is_set(controld_globals.fsa_input_register, + R_CIB_CONNECTED)) { + crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED); + + crmd_fsa_stall(TRUE); + return; + + } else if (!pcmk_is_set(controld_globals.fsa_input_register, + R_READ_CONFIG)) { + crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG); + + crmd_fsa_stall(TRUE); + return; + + } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_PEER_DATA)) { + + crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA); + crmd_fsa_stall(TRUE); + return; + } + + crm_debug("Init server comms"); + ipcs = pcmk__serve_controld_ipc(&crmd_callbacks); + if (ipcs == NULL) { + crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } else { + crm_notice("Pacemaker controller successfully started and accepting connections"); + } + controld_trigger_fencer_connect(); + + controld_clear_fsa_input_flags(R_STARTING); + register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); +} + +/* A_RECOVER */ +void +do_recover(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + controld_set_fsa_input_flags(R_IN_RECOVERY); + crm_warn("Fast-tracking shutdown in response to errors"); + + register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); +} + +static pcmk__cluster_option_t controller_options[] = { + /* name, old name, type, allowed values, + * default value, validator, + * short description, + * long description + */ + { + "dc-version", NULL, "string", NULL, PCMK__VALUE_NONE, NULL, + N_("Pacemaker version on cluster node elected Designated Controller (DC)"), + N_("Includes a hash which identifies the exact changeset the code was " + "built from. Used for diagnostic purposes.") + }, + { + "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL, + N_("The messaging stack on which Pacemaker is currently running"), + N_("Used for informational and diagnostic purposes.") + }, + { + "cluster-name", NULL, "string", NULL, NULL, NULL, + N_("An arbitrary name for the cluster"), + N_("This optional value is mostly for users' convenience as desired " + "in administration, but may also be used in Pacemaker " + "configuration rules via the #cluster-name node attribute, and " + "by higher-level tools and resource agents.") + }, + { + XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time", + NULL, "20s", pcmk__valid_interval_spec, + N_("How long to wait for a response from other nodes during start-up"), + N_("The optimal value will depend on the speed and load of your network " + "and the type of switches used.") + }, + { + XML_CONFIG_ATTR_RECHECK, NULL, "time", + N_("Zero disables polling, while positive values are an interval in seconds" + "(unless other units are specified, for example \"5min\")"), + "15min", pcmk__valid_interval_spec, + N_("Polling interval to recheck cluster state and evaluate rules " + "with date specifications"), + N_("Pacemaker is primarily event-driven, and looks ahead to know when to " + "recheck cluster state for failure timeouts and most time-based " + "rules. However, it will also recheck the cluster after this " + "amount of inactivity, to evaluate rules with date specifications " + "and serve as a fail-safe for certain types of scheduler bugs.") + }, + { + "load-threshold", NULL, "percentage", NULL, + "80%", pcmk__valid_percentage, + N_("Maximum amount of system load that should be used by cluster nodes"), + N_("The cluster will slow down its recovery process when the amount of " + "system resources used (currently CPU) approaches this limit"), + }, + { + "node-action-limit", NULL, "integer", NULL, + "0", pcmk__valid_number, + N_("Maximum number of jobs that can be scheduled per node " + "(defaults to 2x cores)") + }, + { XML_CONFIG_ATTR_FENCE_REACTION, NULL, "string", NULL, "stop", NULL, + N_("How a cluster node should react if notified of its own fencing"), + N_("A cluster node may receive notification of its own fencing if fencing " + "is misconfigured, or if fabric fencing is in use that doesn't cut " + "cluster communication. Allowed values are \"stop\" to attempt to " + "immediately stop Pacemaker and stay stopped, or \"panic\" to attempt " + "to immediately reboot the local node, falling back to stop on failure.") + }, + { + XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL, + "2min", pcmk__valid_interval_spec, + "*** Advanced Use Only ***", + N_("Declare an election failed if it is not decided within this much " + "time. If you need to adjust this value, it probably indicates " + "the presence of a bug.") + }, + { + XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL, + "20min", pcmk__valid_interval_spec, + "*** Advanced Use Only ***", + N_("Exit immediately if shutdown does not complete within this much " + "time. If you need to adjust this value, it probably indicates " + "the presence of a bug.") + }, + { + "join-integration-timeout", "crmd-integration-timeout", "time", NULL, + "3min", pcmk__valid_interval_spec, + "*** Advanced Use Only ***", + N_("If you need to adjust this value, it probably indicates " + "the presence of a bug.") + }, + { + "join-finalization-timeout", "crmd-finalization-timeout", "time", NULL, + "30min", pcmk__valid_interval_spec, + "*** Advanced Use Only ***", + N_("If you need to adjust this value, it probably indicates " + "the presence of a bug.") + }, + { + "transition-delay", "crmd-transition-delay", "time", NULL, + "0s", pcmk__valid_interval_spec, + N_("*** Advanced Use Only *** Enabling this option will slow down " + "cluster recovery under all conditions"), + N_("Delay cluster recovery for this much time to allow for additional " + "events to occur. Useful if your configuration is sensitive to " + "the order in which ping updates arrive.") + }, + { + "stonith-watchdog-timeout", NULL, "time", NULL, + "0", controld_verify_stonith_watchdog_timeout, + N_("How long before nodes can be assumed to be safely down when " + "watchdog-based self-fencing via SBD is in use"), + N_("If this is set to a positive value, lost nodes are assumed to " + "self-fence using watchdog-based SBD within this much time. This " + "does not require a fencing resource to be explicitly configured, " + "though a fence_watchdog resource can be configured, to limit use " + "to specific nodes. If this is set to 0 (the default), the cluster " + "will never assume watchdog-based self-fencing. If this is set to a " + "negative value, the cluster will use twice the local value of the " + "`SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, " + "or otherwise treat this as 0. WARNING: When used, this timeout " + "must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use " + "watchdog-based SBD, and Pacemaker will refuse to start on any of " + "those nodes where this is not true for the local value or SBD is " + "not active. When this is set to a negative value, " + "`SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes " + "that use SBD, otherwise data corruption or loss could occur.") + }, + { + "stonith-max-attempts", NULL, "integer", NULL, + "10", pcmk__valid_positive_number, + N_("How many times fencing can fail before it will no longer be " + "immediately re-attempted on a target") + }, + + // Already documented in libpe_status (other values must be kept identical) + { + "no-quorum-policy", NULL, "select", + "stop, freeze, ignore, demote, suicide", "stop", pcmk__valid_quorum, + N_("What to do when the cluster does not have quorum"), NULL + }, + { + XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL, + "false", pcmk__valid_boolean, + N_("Whether to lock resources to a cleanly shut down node"), + N_("When true, resources active on a node when it is cleanly shut down " + "are kept \"locked\" to that node (not allowed to run elsewhere) " + "until they start again on that node after it rejoins (or for at " + "most shutdown-lock-limit, if set). Stonith resources and " + "Pacemaker Remote connections are never locked. Clone and bundle " + "instances and the promoted role of promotable clones are " + "currently never locked, though support could be added in a future " + "release.") + }, + { + XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL, + "0", pcmk__valid_interval_spec, + N_("Do not lock resources to a cleanly shut down node longer than " + "this"), + N_("If shutdown-lock is true and this is set to a nonzero time " + "duration, shutdown locks will expire after this much time has " + "passed since the shutdown was initiated, even if the node has not " + "rejoined.") + }, +}; + +void +crmd_metadata(void) +{ + const char *desc_short = "Pacemaker controller options"; + const char *desc_long = "Cluster options used by Pacemaker's controller"; + + gchar *s = pcmk__format_option_metadata("pacemaker-controld", desc_short, + desc_long, controller_options, + PCMK__NELEM(controller_options)); + printf("%s", s); + g_free(s); +} + +static void +config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + const char *value = NULL; + GHashTable *config_hash = NULL; + crm_time_t *now = crm_time_new(NULL); + xmlNode *crmconfig = NULL; + xmlNode *alerts = NULL; + + if (rc != pcmk_ok) { + fsa_data_t *msg_data = NULL; + + crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc)); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + + if (rc == -EACCES || rc == -pcmk_err_schema_validation) { + crm_err("The cluster is mis-configured - shutting down and staying down"); + controld_set_fsa_input_flags(R_STAYDOWN); + } + goto bail; + } + + crmconfig = output; + if ((crmconfig) && + (crm_element_name(crmconfig)) && + (strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) { + crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG); + } + if (!crmconfig) { + fsa_data_t *msg_data = NULL; + + crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed"); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + goto bail; + } + + crm_debug("Call %d : Parsing CIB options", call_id); + config_hash = pcmk__strkey_table(free, free); + pe_unpack_nvpairs(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL, + config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL); + + // Validate all options, and use defaults if not already present in hash + pcmk__validate_cluster_options(config_hash, controller_options, + PCMK__NELEM(controller_options)); + + value = g_hash_table_lookup(config_hash, "no-quorum-policy"); + if (pcmk__str_eq(value, "suicide", pcmk__str_casei) && pcmk__locate_sbd()) { + controld_set_global_flags(controld_no_quorum_suicide); + } + + value = g_hash_table_lookup(config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK); + if (crm_is_true(value)) { + controld_set_global_flags(controld_shutdown_lock_enabled); + } else { + controld_clear_global_flags(controld_shutdown_lock_enabled); + } + + value = g_hash_table_lookup(config_hash, + XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT); + controld_globals.shutdown_lock_limit = crm_parse_interval_spec(value) + / 1000; + + value = g_hash_table_lookup(config_hash, "cluster-name"); + pcmk__str_update(&(controld_globals.cluster_name), value); + + // Let subcomponents initialize their own static variables + controld_configure_election(config_hash); + controld_configure_fencing(config_hash); + controld_configure_fsa_timers(config_hash); + controld_configure_throttle(config_hash); + + alerts = first_named_child(output, XML_CIB_TAG_ALERTS); + crmd_unpack_alerts(alerts); + + controld_set_fsa_input_flags(R_READ_CONFIG); + controld_trigger_fsa(); + + g_hash_table_destroy(config_hash); + bail: + crm_time_free(now); +} + +/*! + * \internal + * \brief Trigger read and processing of the configuration + * + * \param[in] fn Calling function name + * \param[in] line Line number where call occurred + */ +void +controld_trigger_config_as(const char *fn, int line) +{ + if (config_read_trigger != NULL) { + crm_trace("%s:%d - Triggered config processing", fn, line); + mainloop_set_trigger(config_read_trigger); + } +} + +gboolean +crm_read_options(gpointer user_data) +{ + cib_t *cib_conn = controld_globals.cib_conn; + int call_id = cib_conn->cmds->query(cib_conn, + "//" XML_CIB_TAG_CRMCONFIG + " | //" XML_CIB_TAG_ALERTS, + NULL, cib_xpath|cib_scope_local); + + fsa_register_cib_callback(call_id, NULL, config_query_callback); + crm_trace("Querying the CIB... call %d", call_id); + return TRUE; +} + +/* A_READCONFIG */ +void +do_read_config(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + throttle_init(); + controld_trigger_config(); +} + +void +crm_shutdown(int nsig) +{ + const char *value = NULL; + guint default_period_ms = 0; + + if ((controld_globals.mainloop == NULL) + || !g_main_loop_is_running(controld_globals.mainloop)) { + crmd_exit(CRM_EX_OK); + return; + } + + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + crm_err("Escalating shutdown"); + register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); + return; + } + + controld_set_fsa_input_flags(R_SHUTDOWN); + register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); + + /* If shutdown timer doesn't have a period set, use the default + * + * @TODO: Evaluate whether this is still necessary. As long as + * config_query_callback() has been run at least once, it doesn't look like + * anything could have changed the timer period since then. + */ + value = pcmk__cluster_option(NULL, controller_options, + PCMK__NELEM(controller_options), + XML_CONFIG_ATTR_FORCE_QUIT); + default_period_ms = crm_parse_interval_spec(value); + controld_shutdown_start_countdown(default_period_ms); +} diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c new file mode 100644 index 0000000..4378b30 --- /dev/null +++ b/daemons/controld/controld_corosync.c @@ -0,0 +1,164 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include + +#include +#include +#include + +#include + +#if SUPPORT_COROSYNC + +extern void post_cache_update(int seq); + +/* A_HA_CONNECT */ + +static void +crmd_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, + uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) +{ + uint32_t kind = 0; + const char *from = NULL; + char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); + + if(data == NULL) { + return; + } + if (kind == crm_class_cluster) { + crm_node_t *peer = NULL; + xmlNode *xml = string2xml(data); + + if (xml == NULL) { + crm_err("Could not parse message content (%d): %.100s", kind, data); + free(data); + return; + } + + crm_xml_add(xml, F_ORIG, from); + /* crm_xml_add_int(xml, F_SEQ, wrapper->id); Fake? */ + + peer = crm_get_peer(0, from); + if (!pcmk_is_set(peer->processes, crm_proc_cpg)) { + /* If we can still talk to our peer process on that node, + * then it must be part of the corosync membership + */ + crm_warn("Receiving messages from a node we think is dead: %s[%d]", + peer->uname, peer->id); + crm_update_peer_proc(__func__, peer, crm_proc_cpg, + ONLINESTATUS); + } + crmd_ha_msg_filter(xml); + free_xml(xml); + } else { + crm_err("Invalid message class (%d): %.100s", kind, data); + } + free(data); +} + +static gboolean +crmd_quorum_callback(unsigned long long seq, gboolean quorate) +{ + crm_update_quorum(quorate, FALSE); + post_cache_update(seq); + return TRUE; +} + +static void +crmd_cs_destroy(gpointer user_data) +{ + if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) { + crm_crit("Lost connection to cluster layer, shutting down"); + crmd_exit(CRM_EX_DISCONNECT); + + } else { + crm_info("Corosync connection closed"); + } +} + +/*! + * \brief Handle a Corosync notification of a CPG configuration change + * + * \param[in] handle CPG connection + * \param[in] cpg_name CPG group name + * \param[in] member_list List of current CPG members + * \param[in] member_list_entries Number of entries in \p member_list + * \param[in] left_list List of CPG members that left + * \param[in] left_list_entries Number of entries in \p left_list + * \param[in] joined_list List of CPG members that joined + * \param[in] joined_list_entries Number of entries in \p joined_list + */ +static void +cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name, + const struct cpg_address *member_list, + size_t member_list_entries, + const struct cpg_address *left_list, + size_t left_list_entries, + const struct cpg_address *joined_list, + size_t joined_list_entries) +{ + /* When nodes leave CPG, the DC clears their transient node attributes. + * + * However if there is no DC, or the DC is among the nodes that left, each + * remaining node needs to do the clearing, to ensure it gets done. + * Otherwise, the attributes would persist when the nodes rejoin, which + * could have serious consequences for unfencing, agents that use attributes + * for internal logic, etc. + * + * Here, we set a global boolean if the DC is among the nodes that left, for + * use by the peer callback. + */ + if (controld_globals.dc_name != NULL) { + crm_node_t *peer = NULL; + + peer = pcmk__search_cluster_node_cache(0, controld_globals.dc_name); + if (peer != NULL) { + for (int i = 0; i < left_list_entries; ++i) { + if (left_list[i].nodeid == peer->id) { + controld_set_global_flags(controld_dc_left); + break; + } + } + } + } + + // Process the change normally, which will call the peer callback as needed + pcmk_cpg_membership(handle, cpg_name, member_list, member_list_entries, + left_list, left_list_entries, + joined_list, joined_list_entries); + + controld_clear_global_flags(controld_dc_left); +} + +extern gboolean crm_connect_corosync(crm_cluster_t * cluster); + +gboolean +crm_connect_corosync(crm_cluster_t * cluster) +{ + if (is_corosync_cluster()) { + crm_set_status_callback(&peer_update_callback); + cluster->cpg.cpg_deliver_fn = crmd_cs_dispatch; + cluster->cpg.cpg_confchg_fn = cpg_membership_callback; + cluster->destroy = crmd_cs_destroy; + + if (crm_cluster_connect(cluster)) { + pcmk__corosync_quorum_connect(crmd_quorum_callback, + crmd_cs_destroy); + return TRUE; + } + } + return FALSE; +} + +#endif diff --git a/daemons/controld/controld_election.c b/daemons/controld/controld_election.c new file mode 100644 index 0000000..5f33d5b --- /dev/null +++ b/daemons/controld/controld_election.c @@ -0,0 +1,292 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include + +static election_t *fsa_election = NULL; + +static gboolean +election_win_cb(gpointer data) +{ + register_fsa_input(C_FSA_INTERNAL, I_ELECTION_DC, NULL); + return FALSE; +} + +void +controld_election_init(const char *uname) +{ + fsa_election = election_init("DC", uname, 60000 /*60s*/, election_win_cb); +} + +/*! + * \internal + * \brief Configure election options based on the CIB + * + * \param[in,out] options Name/value pairs for configured options + */ +void +controld_configure_election(GHashTable *options) +{ + const char *value = NULL; + + value = g_hash_table_lookup(options, XML_CONFIG_ATTR_ELECTION_FAIL); + election_timeout_set_period(fsa_election, crm_parse_interval_spec(value)); +} + +void +controld_remove_voter(const char *uname) +{ + election_remove(fsa_election, uname); + + if (pcmk__str_eq(uname, controld_globals.dc_name, pcmk__str_casei)) { + /* Clear any election dampening in effect. Otherwise, if the lost DC had + * just won, an immediate new election could fizzle out with no new DC. + */ + election_clear_dampening(fsa_election); + } +} + +void +controld_election_fini(void) +{ + election_fini(fsa_election); + fsa_election = NULL; +} + +void +controld_stop_current_election_timeout(void) +{ + election_timeout_stop(fsa_election); +} + +/* A_ELECTION_VOTE */ +void +do_election_vote(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + gboolean not_voting = FALSE; + + /* don't vote if we're in one of these states or wanting to shut down */ + switch (cur_state) { + case S_STARTING: + case S_RECOVERY: + case S_STOPPING: + case S_TERMINATE: + crm_warn("Not voting in election, we're in state %s", fsa_state2string(cur_state)); + not_voting = TRUE; + break; + case S_ELECTION: + case S_INTEGRATION: + case S_RELEASE_DC: + break; + default: + crm_err("Broken? Voting in state %s", fsa_state2string(cur_state)); + break; + } + + if (not_voting == FALSE) { + if (pcmk_is_set(controld_globals.fsa_input_register, R_STARTING)) { + not_voting = TRUE; + } + } + + if (not_voting) { + if (AM_I_DC) { + register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL); + + } else { + register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL); + } + return; + } + + election_vote(fsa_election); + return; +} + +void +do_election_check(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + if (controld_globals.fsa_state == S_ELECTION) { + election_check(fsa_election); + } else { + crm_debug("Ignoring election check because we are not in an election"); + } +} + +/* A_ELECTION_COUNT */ +void +do_election_count_vote(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + enum election_result rc = 0; + ha_msg_input_t *vote = fsa_typed_data(fsa_dt_ha_msg); + + if(crm_peer_cache == NULL) { + if (!pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + crm_err("Internal error, no peer cache"); + } + return; + } + + rc = election_count_vote(fsa_election, vote->msg, cur_state != S_STARTING); + switch(rc) { + case election_start: + election_reset(fsa_election); + register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); + break; + + case election_lost: + update_dc(NULL); + + if (pcmk_is_set(controld_globals.fsa_input_register, R_THE_DC)) { + cib_t *cib_conn = controld_globals.cib_conn; + + register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL); + cib_conn->cmds->set_secondary(cib_conn, cib_scope_local); + + } else if (cur_state != S_STARTING) { + register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL); + } + break; + + default: + crm_trace("Election message resulted in state %d", rc); + } +} + +static void +feature_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + if (rc != pcmk_ok) { + fsa_data_t *msg_data = NULL; + + crm_notice("Feature update failed: %s "CRM_XS" rc=%d", + pcmk_strerror(rc), rc); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } +} + +/*! + * \internal + * \brief Update a node attribute in the CIB during a DC takeover + * + * \param[in] name Name of attribute to update + * \param[in] value New attribute value + */ +#define dc_takeover_update_attr(name, value) do { \ + cib__update_node_attr(controld_globals.logger_out, \ + controld_globals.cib_conn, cib_none, \ + XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, \ + name, value, NULL, NULL); \ + } while (0) + +/* A_DC_TAKEOVER */ +void +do_dc_takeover(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + xmlNode *cib = NULL; + const char *cluster_type = name_for_cluster_type(get_cluster_type()); + pid_t watchdog = pcmk__locate_sbd(); + + crm_info("Taking over DC status for this partition"); + controld_set_fsa_input_flags(R_THE_DC); + execute_stonith_cleanup(); + + election_reset(fsa_election); + controld_set_fsa_input_flags(R_JOIN_OK|R_INVOKE_PE); + + controld_globals.cib_conn->cmds->set_primary(controld_globals.cib_conn, + cib_scope_local); + + cib = create_xml_node(NULL, XML_TAG_CIB); + crm_xml_add(cib, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); + controld_update_cib(XML_TAG_CIB, cib, cib_none, feature_update_callback); + + dc_takeover_update_attr(XML_ATTR_HAVE_WATCHDOG, pcmk__btoa(watchdog)); + dc_takeover_update_attr("dc-version", PACEMAKER_VERSION "-" BUILD_VERSION); + dc_takeover_update_attr("cluster-infrastructure", cluster_type); + +#if SUPPORT_COROSYNC + if ((controld_globals.cluster_name == NULL) && is_corosync_cluster()) { + char *cluster_name = pcmk__corosync_cluster_name(); + + if (cluster_name != NULL) { + dc_takeover_update_attr("cluster-name", cluster_name); + } + free(cluster_name); + } +#endif + + controld_trigger_config(); + free_xml(cib); +} + +/* A_DC_RELEASE */ +void +do_dc_release(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + if (action & A_DC_RELEASE) { + crm_debug("Releasing the role of DC"); + controld_clear_fsa_input_flags(R_THE_DC); + controld_expect_sched_reply(NULL); + + } else if (action & A_DC_RELEASED) { + crm_info("DC role released"); +#if 0 + if (are there errors) { + /* we can't stay up if not healthy */ + /* or perhaps I_ERROR and go to S_RECOVER? */ + result = I_SHUTDOWN; + } +#endif + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + xmlNode *update = NULL; + crm_node_t *node = crm_get_peer(0, controld_globals.our_nodename); + + pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN); + update = create_node_state_update(node, node_update_expected, NULL, + __func__); + /* Don't need a based response because controld will stop. */ + fsa_cib_anon_update_discard_reply(XML_CIB_TAG_STATUS, update); + free_xml(update); + } + register_fsa_input(C_FSA_INTERNAL, I_RELEASE_SUCCESS, NULL); + + } else { + crm_err("Unknown DC action %s", fsa_action2string(action)); + } + + crm_trace("Am I still the DC? %s", AM_I_DC ? XML_BOOLEAN_YES : XML_BOOLEAN_NO); + +} diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c new file mode 100644 index 0000000..0de399c --- /dev/null +++ b/daemons/controld/controld_execd.c @@ -0,0 +1,2433 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include + +#include +#include // lrmd_event_data_t, lrmd_rsc_info_t, etc. +#include +#include +#include +#include +#include + +#include +#include + +#define START_DELAY_THRESHOLD 5 * 60 * 1000 +#define MAX_LRM_REG_FAILS 30 + +struct delete_event_s { + int rc; + const char *rsc; + lrm_state_t *lrm_state; +}; + +static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id); +static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list); +static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); + +static lrmd_event_data_t *construct_op(const lrm_state_t *lrm_state, + const xmlNode *rsc_op, + const char *rsc_id, + const char *operation); +static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, + xmlNode *msg, struct ra_metadata_s *md); + +static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, + int log_level); + +static void +lrm_connection_destroy(void) +{ + if (pcmk_is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) { + crm_crit("Connection to executor failed"); + register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); + controld_clear_fsa_input_flags(R_LRM_CONNECTED); + + } else { + crm_info("Disconnected from executor"); + } + +} + +static char * +make_stop_id(const char *rsc, int call_id) +{ + return crm_strdup_printf("%s:%d", rsc, call_id); +} + +static void +copy_instance_keys(gpointer key, gpointer value, gpointer user_data) +{ + if (strstr(key, CRM_META "_") == NULL) { + g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); + } +} + +static void +copy_meta_keys(gpointer key, gpointer value, gpointer user_data) +{ + if (strstr(key, CRM_META "_") != NULL) { + g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); + } +} + +/*! + * \internal + * \brief Remove a recurring operation from a resource's history + * + * \param[in,out] history Resource history to modify + * \param[in] op Operation to remove + * + * \return TRUE if the operation was found and removed, FALSE otherwise + */ +static gboolean +history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op) +{ + GList *iter; + + for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { + lrmd_event_data_t *existing = iter->data; + + if ((op->interval_ms == existing->interval_ms) + && pcmk__str_eq(op->rsc_id, existing->rsc_id, pcmk__str_none) + && pcmk__str_eq(op->op_type, existing->op_type, pcmk__str_casei)) { + + history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter); + lrmd_free_event(existing); + return TRUE; + } + } + return FALSE; +} + +/*! + * \internal + * \brief Free all recurring operations in resource history + * + * \param[in,out] history Resource history to modify + */ +static void +history_free_recurring_ops(rsc_history_t *history) +{ + GList *iter; + + for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { + lrmd_free_event(iter->data); + } + g_list_free(history->recurring_op_list); + history->recurring_op_list = NULL; +} + +/*! + * \internal + * \brief Free resource history + * + * \param[in,out] history Resource history to free + */ +void +history_free(gpointer data) +{ + rsc_history_t *history = (rsc_history_t*)data; + + if (history->stop_params) { + g_hash_table_destroy(history->stop_params); + } + + /* Don't need to free history->rsc.id because it's set to history->id */ + free(history->rsc.type); + free(history->rsc.standard); + free(history->rsc.provider); + + lrmd_free_event(history->failed); + lrmd_free_event(history->last); + free(history->id); + history_free_recurring_ops(history); + free(history); +} + +static void +update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) +{ + int target_rc = 0; + rsc_history_t *entry = NULL; + + if (op->rsc_deleted) { + crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); + controld_delete_resource_history(op->rsc_id, lrm_state->node_name, + NULL, crmd_cib_smart_opt()); + return; + } + + if (pcmk__str_eq(op->op_type, RSC_NOTIFY, pcmk__str_casei)) { + return; + } + + crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type); + + entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id); + if (entry == NULL && rsc) { + entry = calloc(1, sizeof(rsc_history_t)); + entry->id = strdup(op->rsc_id); + g_hash_table_insert(lrm_state->resource_history, entry->id, entry); + + entry->rsc.id = entry->id; + entry->rsc.type = strdup(rsc->type); + entry->rsc.standard = strdup(rsc->standard); + pcmk__str_update(&entry->rsc.provider, rsc->provider); + + } else if (entry == NULL) { + crm_info("Resource %s no longer exists, not updating cache", op->rsc_id); + return; + } + + entry->last_callid = op->call_id; + target_rc = rsc_op_expected_rc(op); + if (op->op_status == PCMK_EXEC_CANCELLED) { + if (op->interval_ms > 0) { + crm_trace("Removing cancelled recurring op: " PCMK__OP_FMT, + op->rsc_id, op->op_type, op->interval_ms); + history_remove_recurring_op(entry, op); + return; + } else { + crm_trace("Skipping " PCMK__OP_FMT " rc=%d, status=%d", + op->rsc_id, op->op_type, op->interval_ms, op->rc, + op->op_status); + } + + } else if (did_rsc_op_fail(op, target_rc)) { + /* Store failed monitors here, otherwise the block below will cause them + * to be forgotten when a stop happens. + */ + if (entry->failed) { + lrmd_free_event(entry->failed); + } + entry->failed = lrmd_copy_event(op); + + } else if (op->interval_ms == 0) { + if (entry->last) { + lrmd_free_event(entry->last); + } + entry->last = lrmd_copy_event(op); + + if (op->params && pcmk__strcase_any_of(op->op_type, CRMD_ACTION_START, + CRMD_ACTION_RELOAD, + CRMD_ACTION_RELOAD_AGENT, + CRMD_ACTION_STATUS, NULL)) { + if (entry->stop_params) { + g_hash_table_destroy(entry->stop_params); + } + entry->stop_params = pcmk__strkey_table(free, free); + + g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params); + } + } + + if (op->interval_ms > 0) { + /* Ensure there are no duplicates */ + history_remove_recurring_op(entry, op); + + crm_trace("Adding recurring op: " PCMK__OP_FMT, + op->rsc_id, op->op_type, op->interval_ms); + entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); + + } else if (entry->recurring_op_list && !pcmk__str_eq(op->op_type, RSC_STATUS, pcmk__str_casei)) { + crm_trace("Dropping %d recurring ops because of: " PCMK__OP_FMT, + g_list_length(entry->recurring_op_list), op->rsc_id, + op->op_type, op->interval_ms); + history_free_recurring_ops(entry); + } +} + +/*! + * \internal + * \brief Send a direct OK ack for a resource task + * + * \param[in] lrm_state LRM connection + * \param[in] input Input message being ack'ed + * \param[in] rsc_id ID of affected resource + * \param[in] rsc Affected resource (if available) + * \param[in] task Operation task being ack'ed + * \param[in] ack_host Name of host to send ack to + * \param[in] ack_sys IPC system name to ack + */ +static void +send_task_ok_ack(const lrm_state_t *lrm_state, const ha_msg_input_t *input, + const char *rsc_id, const lrmd_rsc_info_t *rsc, + const char *task, const char *ack_host, const char *ack_sys) +{ + lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task); + + lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); + controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id); + lrmd_free_event(op); +} + +static inline const char * +op_node_name(lrmd_event_data_t *op) +{ + return pcmk__s(op->remote_nodename, controld_globals.our_nodename); +} + +void +lrm_op_callback(lrmd_event_data_t * op) +{ + CRM_CHECK(op != NULL, return); + switch (op->type) { + case lrmd_event_disconnect: + if (op->remote_nodename == NULL) { + /* If this is the local executor IPC connection, set the right + * bits in the controller when the connection goes down. + */ + lrm_connection_destroy(); + } + break; + + case lrmd_event_exec_complete: + { + lrm_state_t *lrm_state = lrm_state_find(op_node_name(op)); + + CRM_ASSERT(lrm_state != NULL); + process_lrm_event(lrm_state, op, NULL, NULL); + } + break; + + default: + break; + } +} + +static void +try_local_executor_connect(long long action, fsa_data_t *msg_data, + lrm_state_t *lrm_state) +{ + int rc = pcmk_rc_ok; + + crm_debug("Connecting to the local executor"); + + // If we can connect, great + rc = controld_connect_local_executor(lrm_state); + if (rc == pcmk_rc_ok) { + controld_set_fsa_input_flags(R_LRM_CONNECTED); + crm_info("Connection to the local executor established"); + return; + } + + // Otherwise, if we can try again, set a timer to do so + if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) { + crm_warn("Failed to connect to the local executor %d time%s " + "(%d max): %s", lrm_state->num_lrm_register_fails, + pcmk__plural_s(lrm_state->num_lrm_register_fails), + MAX_LRM_REG_FAILS, pcmk_rc_str(rc)); + controld_start_wait_timer(); + crmd_fsa_stall(FALSE); + return; + } + + // Otherwise give up + crm_err("Failed to connect to the executor the max allowed " + "%d time%s: %s", lrm_state->num_lrm_register_fails, + pcmk__plural_s(lrm_state->num_lrm_register_fails), + pcmk_rc_str(rc)); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); +} + +/* A_LRM_CONNECT */ +void +do_lrm_control(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + /* This only pertains to local executor connections. Remote connections are + * handled as resources within the scheduler. Connecting and disconnecting + * from remote executor instances is handled differently. + */ + + lrm_state_t *lrm_state = NULL; + + if (controld_globals.our_nodename == NULL) { + return; /* Nothing to do */ + } + lrm_state = lrm_state_find_or_create(controld_globals.our_nodename); + if (lrm_state == NULL) { + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + return; + } + + if (action & A_LRM_DISCONNECT) { + if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) { + if (action == A_LRM_DISCONNECT) { + crmd_fsa_stall(FALSE); + return; + } + } + + controld_clear_fsa_input_flags(R_LRM_CONNECTED); + crm_info("Disconnecting from the executor"); + lrm_state_disconnect(lrm_state); + lrm_state_reset_tables(lrm_state, FALSE); + crm_notice("Disconnected from the executor"); + } + + if (action & A_LRM_CONNECT) { + try_local_executor_connect(action, msg_data, lrm_state); + } + + if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { + crm_err("Unexpected action %s in %s", fsa_action2string(action), + __func__); + } +} + +static gboolean +lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level) +{ + int counter = 0; + gboolean rc = TRUE; + const char *when = "lrm disconnect"; + + GHashTableIter gIter; + const char *key = NULL; + rsc_history_t *entry = NULL; + active_op_t *pending = NULL; + + crm_debug("Checking for active resources before exit"); + + if (cur_state == S_TERMINATE) { + log_level = LOG_ERR; + when = "shutdown"; + + } else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + when = "shutdown... waiting"; + } + + if ((lrm_state->active_ops != NULL) && lrm_state_is_connected(lrm_state)) { + guint removed = g_hash_table_foreach_remove(lrm_state->active_ops, + stop_recurring_actions, + lrm_state); + guint nremaining = g_hash_table_size(lrm_state->active_ops); + + if (removed || nremaining) { + crm_notice("Stopped %u recurring operation%s at %s (%u remaining)", + removed, pcmk__plural_s(removed), when, nremaining); + } + } + + if (lrm_state->active_ops != NULL) { + g_hash_table_iter_init(&gIter, lrm_state->active_ops); + while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) { + /* Ignore recurring actions in the shutdown calculations */ + if (pending->interval_ms == 0) { + counter++; + } + } + } + + if (counter > 0) { + do_crm_log(log_level, "%d pending executor operation%s at %s", + counter, pcmk__plural_s(counter), when); + + if ((cur_state == S_TERMINATE) + || !pcmk_is_set(controld_globals.fsa_input_register, + R_SENT_RSC_STOP)) { + g_hash_table_iter_init(&gIter, lrm_state->active_ops); + while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) { + do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key); + } + + } else { + rc = FALSE; + } + return rc; + } + + if (lrm_state->resource_history == NULL) { + return rc; + } + + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + /* At this point we're not waiting, we're just shutting down */ + when = "shutdown"; + } + + counter = 0; + g_hash_table_iter_init(&gIter, lrm_state->resource_history); + while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) { + if (is_rsc_active(lrm_state, entry->id) == FALSE) { + continue; + } + + counter++; + if (log_level == LOG_ERR) { + crm_info("Found %s active at %s", entry->id, when); + } else { + crm_trace("Found %s active at %s", entry->id, when); + } + if (lrm_state->active_ops != NULL) { + GHashTableIter hIter; + + g_hash_table_iter_init(&hIter, lrm_state->active_ops); + while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) { + if (pcmk__str_eq(entry->id, pending->rsc_id, pcmk__str_none)) { + crm_notice("%sction %s (%s) incomplete at %s", + pending->interval_ms == 0 ? "A" : "Recurring a", + key, pending->op_key, when); + } + } + } + } + + if (counter) { + crm_err("%d resource%s active at %s", + counter, (counter == 1)? " was" : "s were", when); + } + + return rc; +} + +static gboolean +is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id) +{ + rsc_history_t *entry = NULL; + + entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); + if (entry == NULL || entry->last == NULL) { + return FALSE; + } + + crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type, + entry->last->interval_ms, entry->last->rc); + if (entry->last->rc == PCMK_OCF_OK && pcmk__str_eq(entry->last->op_type, CRMD_ACTION_STOP, pcmk__str_casei)) { + return FALSE; + + } else if (entry->last->rc == PCMK_OCF_OK + && pcmk__str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE, pcmk__str_casei)) { + // A stricter check is too complex ... leave that to the scheduler + return FALSE; + + } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) { + return FALSE; + + } else if ((entry->last->interval_ms == 0) + && (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) { + /* Badly configured resources can't be reliably stopped */ + return FALSE; + } + + return TRUE; +} + +static gboolean +build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) +{ + GHashTableIter iter; + rsc_history_t *entry = NULL; + + g_hash_table_iter_init(&iter, lrm_state->resource_history); + while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { + + GList *gIter = NULL; + xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE); + + crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id); + crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type); + crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.standard); + crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider); + + if (entry->last && entry->last->params) { + const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); + if (container) { + crm_trace("Resource %s is a part of container resource %s", entry->id, container); + crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container); + } + } + controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->failed, + lrm_state->node_name); + controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->last, + lrm_state->node_name); + for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) { + controld_add_resource_history_xml(xml_rsc, &(entry->rsc), gIter->data, + lrm_state->node_name); + } + } + + return FALSE; +} + +xmlNode * +controld_query_executor_state(void) +{ + xmlNode *xml_state = NULL; + xmlNode *xml_data = NULL; + xmlNode *rsc_list = NULL; + crm_node_t *peer = NULL; + lrm_state_t *lrm_state = lrm_state_find(controld_globals.our_nodename); + + if (!lrm_state) { + crm_err("Could not find executor state for node %s", + controld_globals.our_nodename); + return NULL; + } + + peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY); + CRM_CHECK(peer != NULL, return NULL); + + xml_state = create_node_state_update(peer, + node_update_cluster|node_update_peer, + NULL, __func__); + if (xml_state == NULL) { + return NULL; + } + + xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); + crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid); + rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); + + /* Build a list of active (not always running) resources */ + build_active_RAs(lrm_state, rsc_list); + + crm_log_xml_trace(xml_state, "Current executor state"); + + return xml_state; +} + +/*! + * \internal + * \brief Map standard Pacemaker return code to operation status and OCF code + * + * \param[out] event Executor event whose status and return code should be set + * \param[in] rc Standard Pacemaker return code + */ +void +controld_rc2event(lrmd_event_data_t *event, int rc) +{ + /* This is called for cleanup requests from controller peers/clients, not + * for resource actions, so no exit reason is needed. + */ + switch (rc) { + case pcmk_rc_ok: + lrmd__set_result(event, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); + break; + case EACCES: + lrmd__set_result(event, PCMK_OCF_INSUFFICIENT_PRIV, + PCMK_EXEC_ERROR, NULL); + break; + default: + lrmd__set_result(event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + NULL); + break; + } +} + +/*! + * \internal + * \brief Trigger a new transition after CIB status was deleted + * + * If a CIB status delete was not expected (as part of the transition graph), + * trigger a new transition by updating the (arbitrary) "last-lrm-refresh" + * cluster property. + * + * \param[in] from_sys IPC name that requested the delete + * \param[in] rsc_id Resource whose status was deleted (for logging only) + */ +void +controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id) +{ + if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) { + char *now_s = crm_strdup_printf("%lld", (long long) time(NULL)); + + crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id); + cib__update_node_attr(controld_globals.logger_out, + controld_globals.cib_conn, cib_none, + XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, + "last-lrm-refresh", now_s, NULL, NULL); + free(now_s); + } +} + +static void +notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc) +{ + lrmd_event_data_t *op = NULL; + const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); + const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); + + crm_info("Notifying %s on %s that %s was%s deleted", + from_sys, (from_host? from_host : "localhost"), rsc_id, + ((rc == pcmk_ok)? "" : " not")); + op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE); + controld_rc2event(op, pcmk_legacy2rc(rc)); + controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id); + lrmd_free_event(op); + controld_trigger_delete_refresh(from_sys, rsc_id); +} + +static gboolean +lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data) +{ + struct delete_event_s *event = user_data; + struct pending_deletion_op_s *op = value; + + if (pcmk__str_eq(event->rsc, op->rsc, pcmk__str_none)) { + notify_deleted(event->lrm_state, op->input, event->rsc, event->rc); + return TRUE; + } + return FALSE; +} + +static gboolean +lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) +{ + const char *rsc = user_data; + active_op_t *pending = value; + + if (pcmk__str_eq(rsc, pending->rsc_id, pcmk__str_none)) { + crm_info("Removing op %s:%d for deleted resource %s", + pending->op_key, pending->call_id, rsc); + return TRUE; + } + return FALSE; +} + +static void +delete_rsc_entry(lrm_state_t *lrm_state, ha_msg_input_t *input, + const char *rsc_id, GHashTableIter *rsc_iter, int rc, + const char *user_name, bool from_cib) +{ + struct delete_event_s event; + + CRM_CHECK(rsc_id != NULL, return); + + if (rc == pcmk_ok) { + char *rsc_id_copy = strdup(rsc_id); + + if (rsc_iter) { + g_hash_table_iter_remove(rsc_iter); + } else { + g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); + } + + if (from_cib) { + controld_delete_resource_history(rsc_id_copy, lrm_state->node_name, + user_name, crmd_cib_smart_opt()); + } + g_hash_table_foreach_remove(lrm_state->active_ops, + lrm_remove_deleted_op, rsc_id_copy); + free(rsc_id_copy); + } + + if (input) { + notify_deleted(lrm_state, input, rsc_id, rc); + } + + event.rc = rc; + event.rsc = rsc_id; + event.lrm_state = lrm_state; + g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event); +} + +static inline gboolean +last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms) +{ + if (entry == NULL) { + return FALSE; + } + if (op == NULL) { + return TRUE; + } + return (pcmk__str_eq(op, entry->failed->op_type, pcmk__str_casei) + && (interval_ms == entry->failed->interval_ms)); +} + +/*! + * \internal + * \brief Clear a resource's last failure + * + * Erase a resource's last failure on a particular node from both the + * LRM resource history in the CIB, and the resource history remembered + * for the LRM state. + * + * \param[in] rsc_id Resource name + * \param[in] node_name Node name + * \param[in] operation If specified, only clear if matching this operation + * \param[in] interval_ms If operation is specified, it has this interval + */ +void +lrm_clear_last_failure(const char *rsc_id, const char *node_name, + const char *operation, guint interval_ms) +{ + lrm_state_t *lrm_state = lrm_state_find(node_name); + + if (lrm_state == NULL) { + return; + } + if (lrm_state->resource_history != NULL) { + rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, + rsc_id); + + if (last_failed_matches_op(entry, operation, interval_ms)) { + lrmd_free_event(entry->failed); + entry->failed = NULL; + } + } +} + +/* Returns: gboolean - cancellation is in progress */ +static gboolean +cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove) +{ + int rc = pcmk_ok; + char *local_key = NULL; + active_op_t *pending = NULL; + + CRM_CHECK(op != 0, return FALSE); + CRM_CHECK(rsc_id != NULL, return FALSE); + if (key == NULL) { + local_key = make_stop_id(rsc_id, op); + key = local_key; + } + pending = g_hash_table_lookup(lrm_state->active_ops, key); + + if (pending) { + if (remove && !pcmk_is_set(pending->flags, active_op_remove)) { + controld_set_active_op_flags(pending, active_op_remove); + crm_debug("Scheduling %s for removal", key); + } + + if (pcmk_is_set(pending->flags, active_op_cancelled)) { + crm_debug("Operation %s already cancelled", key); + free(local_key); + return FALSE; + } + controld_set_active_op_flags(pending, active_op_cancelled); + + } else { + crm_info("No pending op found for %s", key); + free(local_key); + return FALSE; + } + + crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key); + rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, + pending->interval_ms); + if (rc == pcmk_ok) { + crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key); + free(local_key); + return TRUE; + } + + crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key); + /* The caller needs to make sure the entry is + * removed from the active operations list + * + * Usually by returning TRUE inside the worker function + * supplied to g_hash_table_foreach_remove() + * + * Not removing the entry from active operations will block + * the node from shutting down + */ + free(local_key); + return FALSE; +} + +struct cancel_data { + gboolean done; + gboolean remove; + const char *key; + lrmd_rsc_info_t *rsc; + lrm_state_t *lrm_state; +}; + +static gboolean +cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) +{ + gboolean remove = FALSE; + struct cancel_data *data = user_data; + active_op_t *op = value; + + if (pcmk__str_eq(op->op_key, data->key, pcmk__str_none)) { + data->done = TRUE; + remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove); + } + return remove; +} + +static gboolean +cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove) +{ + guint removed = 0; + struct cancel_data data; + + CRM_CHECK(rsc != NULL, return FALSE); + CRM_CHECK(key != NULL, return FALSE); + + data.key = key; + data.rsc = rsc; + data.done = FALSE; + data.remove = remove; + data.lrm_state = lrm_state; + + removed = g_hash_table_foreach_remove(lrm_state->active_ops, + cancel_action_by_key, &data); + crm_trace("Removed %u op cache entries, new size: %u", + removed, g_hash_table_size(lrm_state->active_ops)); + return data.done; +} + +/*! + * \internal + * \brief Retrieve resource information from LRM + * + * \param[in,out] lrm_state Executor connection state to use + * \param[in] rsc_xml XML containing resource configuration + * \param[in] do_create If true, register resource if not already + * \param[out] rsc_info Where to store information obtained from executor + * + * \retval pcmk_ok Success (and rsc_info holds newly allocated result) + * \retval -EINVAL Required information is missing from arguments + * \retval -ENOTCONN No active connection to LRM + * \retval -ENODEV Resource not found + * \retval -errno Error communicating with executor when registering resource + * + * \note Caller is responsible for freeing result on success. + */ +static int +get_lrm_resource(lrm_state_t *lrm_state, const xmlNode *rsc_xml, + gboolean do_create, lrmd_rsc_info_t **rsc_info) +{ + const char *id = ID(rsc_xml); + + CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL); + CRM_CHECK(id, return -EINVAL); + + if (lrm_state_is_connected(lrm_state) == FALSE) { + return -ENOTCONN; + } + + crm_trace("Retrieving resource information for %s from the executor", id); + *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); + + // If resource isn't known by ID, try clone name, if provided + if (!*rsc_info) { + const char *long_id = crm_element_value(rsc_xml, XML_ATTR_ID_LONG); + + if (long_id) { + *rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0); + } + } + + if ((*rsc_info == NULL) && do_create) { + const char *class = crm_element_value(rsc_xml, XML_AGENT_ATTR_CLASS); + const char *provider = crm_element_value(rsc_xml, XML_AGENT_ATTR_PROVIDER); + const char *type = crm_element_value(rsc_xml, XML_ATTR_TYPE); + int rc; + + crm_trace("Registering resource %s with the executor", id); + rc = lrm_state_register_rsc(lrm_state, id, class, provider, type, + lrmd_opt_drop_recurring); + if (rc != pcmk_ok) { + fsa_data_t *msg_data = NULL; + + crm_err("Could not register resource %s with the executor on %s: %s " + CRM_XS " rc=%d", + id, lrm_state->node_name, pcmk_strerror(rc), rc); + + /* Register this as an internal error if this involves the local + * executor. Otherwise, we're likely dealing with an unresponsive + * remote node, which is not an FSA failure. + */ + if (lrm_state_is_local(lrm_state) == TRUE) { + register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); + } + return rc; + } + + *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); + } + return *rsc_info? pcmk_ok : -ENODEV; +} + +static void +delete_resource(lrm_state_t *lrm_state, const char *id, lrmd_rsc_info_t *rsc, + GHashTableIter *iter, const char *sys, const char *user, + ha_msg_input_t *request, bool unregister, bool from_cib) +{ + int rc = pcmk_ok; + + crm_info("Removing resource %s from executor for %s%s%s", + id, sys, (user? " as " : ""), (user? user : "")); + + if (rsc && unregister) { + rc = lrm_state_unregister_rsc(lrm_state, id, 0); + } + + if (rc == pcmk_ok) { + crm_trace("Resource %s deleted from executor", id); + } else if (rc == -EINPROGRESS) { + crm_info("Deletion of resource '%s' from executor is pending", id); + if (request) { + struct pending_deletion_op_s *op = NULL; + char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE); + + op = calloc(1, sizeof(struct pending_deletion_op_s)); + op->rsc = strdup(rsc->id); + op->input = copy_ha_msg_input(request); + g_hash_table_insert(lrm_state->deletion_ops, ref, op); + } + return; + } else { + crm_warn("Could not delete '%s' from executor for %s%s%s: %s " + CRM_XS " rc=%d", id, sys, (user? " as " : ""), + (user? user : ""), pcmk_strerror(rc), rc); + } + + delete_rsc_entry(lrm_state, request, id, iter, rc, user, from_cib); +} + +static int +get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id) +{ + int call_id = 999999999; + rsc_history_t *entry = NULL; + + if(lrm_state) { + entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); + } + + /* Make sure the call id is greater than the last successful operation, + * otherwise the failure will not result in a possible recovery of the resource + * as it could appear the failure occurred before the successful start */ + if (entry) { + call_id = entry->last_callid + 1; + } + + if (call_id < 0) { + call_id = 1; + } + return call_id; +} + +static void +fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status, + enum ocf_exitcode op_exitcode, const char *exit_reason) +{ + op->call_id = get_fake_call_id(lrm_state, op->rsc_id); + op->t_run = time(NULL); + op->t_rcchange = op->t_run; + lrmd__set_result(op, op_exitcode, op_status, exit_reason); +} + +static void +force_reprobe(lrm_state_t *lrm_state, const char *from_sys, + const char *from_host, const char *user_name, + gboolean is_remote_node, bool reprobe_all_nodes) +{ + GHashTableIter gIter; + rsc_history_t *entry = NULL; + + crm_info("Clearing resource history on node %s", lrm_state->node_name); + g_hash_table_iter_init(&gIter, lrm_state->resource_history); + while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { + /* only unregister the resource during a reprobe if it is not a remote connection + * resource. otherwise unregistering the connection will terminate remote-node + * membership */ + bool unregister = true; + + if (is_remote_lrmd_ra(NULL, NULL, entry->id)) { + unregister = false; + + if (reprobe_all_nodes) { + lrm_state_t *remote_lrm_state = lrm_state_find(entry->id); + + if (remote_lrm_state != NULL) { + /* If reprobing all nodes, be sure to reprobe the remote + * node before clearing its connection resource + */ + force_reprobe(remote_lrm_state, from_sys, from_host, + user_name, TRUE, reprobe_all_nodes); + } + } + } + + /* Don't delete from the CIB, since we'll delete the whole node's LRM + * state from the CIB soon + */ + delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, + user_name, NULL, unregister, false); + } + + /* Now delete the copy in the CIB */ + controld_delete_node_state(lrm_state->node_name, controld_section_lrm, + cib_scope_local); + + // @COMPAT DCs < 1.1.14 need this deleted (in case it was explicitly false) + update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node); +} + +/*! + * \internal + * \brief Fail a requested action without actually executing it + * + * For an action that can't be executed, process it similarly to an actual + * execution result, with specified error status (except for notify actions, + * which will always be treated as successful). + * + * \param[in,out] lrm_state Executor connection that action is for + * \param[in] action Action XML from request + * \param[in] rc Desired return code to use + * \param[in] op_status Desired operation status to use + * \param[in] exit_reason Human-friendly detail, if error + */ +static void +synthesize_lrmd_failure(lrm_state_t *lrm_state, const xmlNode *action, + int op_status, enum ocf_exitcode rc, + const char *exit_reason) +{ + lrmd_event_data_t *op = NULL; + const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK); + const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET); + xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE); + + if ((xml_rsc == NULL) || (ID(xml_rsc) == NULL)) { + /* @TODO Should we do something else, like direct ack? */ + crm_info("Can't fake %s failure (%d) on %s without resource configuration", + crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, + target_node); + return; + + } else if(operation == NULL) { + /* This probably came from crm_resource -C, nothing to do */ + crm_info("Can't fake %s failure (%d) on %s without operation", + ID(xml_rsc), rc, target_node); + return; + } + + op = construct_op(lrm_state, action, ID(xml_rsc), operation); + + if (pcmk__str_eq(operation, RSC_NOTIFY, pcmk__str_casei)) { // Notifications can't fail + fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_OK, NULL); + } else { + fake_op_status(lrm_state, op, op_status, rc, exit_reason); + } + + crm_info("Faking " PCMK__OP_FMT " result (%d) on %s", + op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node); + + // Process the result as if it came from the LRM + process_lrm_event(lrm_state, op, NULL, action); + lrmd_free_event(op); +} + +/*! + * \internal + * \brief Get target of an LRM operation (replacing \p NULL with local node + * name) + * + * \param[in] xml LRM operation data XML + * + * \return LRM operation target node name (local node or Pacemaker Remote node) + */ +static const char * +lrm_op_target(const xmlNode *xml) +{ + const char *target = NULL; + + if (xml) { + target = crm_element_value(xml, XML_LRM_ATTR_TARGET); + } + if (target == NULL) { + target = controld_globals.our_nodename; + } + return target; +} + +static void +fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, + const char *from_host, const char *from_sys) +{ + lrmd_event_data_t *op = NULL; + lrmd_rsc_info_t *rsc = NULL; + xmlNode *xml_rsc = find_xml_node(xml, XML_CIB_TAG_RESOURCE, TRUE); + + CRM_CHECK(xml_rsc != NULL, return); + + /* The executor simply executes operations and reports the results, without + * any concept of success or failure, so to fail a resource, we must fake + * what a failure looks like. + * + * To do this, we create a fake executor operation event for the resource, + * and pass that event to the executor client callback so it will be + * processed as if it came from the executor. + */ + op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon"); + + free((char*) op->user_data); + op->user_data = NULL; + op->interval_ms = 0; + + if (user_name && !pcmk__is_privileged(user_name)) { + crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); + fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, + PCMK_OCF_INSUFFICIENT_PRIV, + "Unprivileged user cannot fail resources"); + controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc)); + lrmd_free_event(op); + return; + } + + + if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) { + crm_info("Failing resource %s...", rsc->id); + fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_UNKNOWN_ERROR, + "Simulated failure"); + process_lrm_event(lrm_state, op, NULL, xml); + op->rc = PCMK_OCF_OK; // The request to fail the resource succeeded + lrmd_free_rsc_info(rsc); + + } else { + crm_info("Cannot find/create resource in order to fail it..."); + crm_log_xml_warn(xml, "bad input"); + fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_UNKNOWN_ERROR, + "Cannot fail unknown resource"); + } + + controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc)); + lrmd_free_event(op); +} + +static void +handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys, + const char *from_host, const char *user_name, + gboolean is_remote_node, bool reprobe_all_nodes) +{ + crm_notice("Forcing the status of all resources to be redetected"); + force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node, + reprobe_all_nodes); + + if (!pcmk__strcase_any_of(from_sys, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) { + + xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host, + from_sys, CRM_SYSTEM_LRMD, + controld_globals.our_uuid); + + crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); + + if (relay_message(reply, TRUE) == FALSE) { + crm_log_xml_err(reply, "Unable to route reply"); + } + free_xml(reply); + } +} + +static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state, + lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys) +{ + char *op_key = NULL; + char *meta_key = NULL; + int call = 0; + const char *call_id = NULL; + const char *op_task = NULL; + guint interval_ms = 0; + gboolean in_progress = FALSE; + xmlNode *params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); + + CRM_CHECK(params != NULL, return FALSE); + + meta_key = crm_meta_name(XML_LRM_ATTR_TASK); + op_task = crm_element_value(params, meta_key); + free(meta_key); + CRM_CHECK(op_task != NULL, return FALSE); + + meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS); + if (crm_element_value_ms(params, meta_key, &interval_ms) != pcmk_ok) { + free(meta_key); + return FALSE; + } + free(meta_key); + + op_key = pcmk__op_key(rsc->id, op_task, interval_ms); + + meta_key = crm_meta_name(XML_LRM_ATTR_CALLID); + call_id = crm_element_value(params, meta_key); + free(meta_key); + + crm_debug("Scheduler requested op %s (call=%s) be cancelled", + op_key, (call_id? call_id : "NA")); + pcmk__scan_min_int(call_id, &call, 0); + if (call == 0) { + // Normal case when the scheduler cancels a recurring op + in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE); + + } else { + // Normal case when the scheduler cancels an orphan op + in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE); + } + + // Acknowledge cancellation operation if for a remote connection resource + if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) { + char *op_id = make_stop_id(rsc->id, call); + + if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) { + crm_info("Nothing known about operation %d for %s", call, op_key); + } + controld_delete_action_history_by_key(rsc->id, lrm_state->node_name, + op_key, call); + send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, + from_host, from_sys); + + /* needed at least for cancellation of a remote operation */ + if (lrm_state->active_ops != NULL) { + g_hash_table_remove(lrm_state->active_ops, op_id); + } + free(op_id); + + } else { + /* No ack is needed since abcdaa8, but peers with older versions + * in a rolling upgrade need one. We didn't bump the feature set + * at that commit, so we can only compare against the previous + * CRM version (3.0.8). If any peers have feature set 3.0.9 but + * not abcdaa8, they will time out waiting for the ack (no + * released versions of Pacemaker are affected). + */ + const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION); + + if (compare_version(peer_version, "3.0.8") <= 0) { + crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)", + op_key, from_host, peer_version); + send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, + from_host, from_sys); + } + } + + free(op_key); + return TRUE; +} + +static void +do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, + lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host, + bool crm_rsc_delete, const char *user_name) +{ + bool unregister = true; + int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name, + user_name, + cib_dryrun|cib_sync_call); + + if (cib_rc != pcmk_rc_ok) { + lrmd_event_data_t *op = NULL; + + op = construct_op(lrm_state, input->xml, rsc->id, CRMD_ACTION_DELETE); + + /* These are resource clean-ups, not actions, so no exit reason is + * needed. + */ + lrmd__set_result(op, pcmk_rc2ocf(cib_rc), PCMK_EXEC_ERROR, NULL); + controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id); + lrmd_free_event(op); + return; + } + + if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) { + unregister = false; + } + + delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, + user_name, input, unregister, true); +} + +// User data for asynchronous metadata execution +struct metadata_cb_data { + lrmd_rsc_info_t *rsc; // Copy of resource information + xmlNode *input_xml; // Copy of FSA input XML +}; + +static struct metadata_cb_data * +new_metadata_cb_data(lrmd_rsc_info_t *rsc, xmlNode *input_xml) +{ + struct metadata_cb_data *data = NULL; + + data = calloc(1, sizeof(struct metadata_cb_data)); + CRM_ASSERT(data != NULL); + data->input_xml = copy_xml(input_xml); + data->rsc = lrmd_copy_rsc_info(rsc); + return data; +} + +static void +free_metadata_cb_data(struct metadata_cb_data *data) +{ + lrmd_free_rsc_info(data->rsc); + free_xml(data->input_xml); + free(data); +} + +/*! + * \internal + * \brief Execute an action after metadata has been retrieved + * + * \param[in] pid Ignored + * \param[in] result Result of metadata action + * \param[in] user_data Metadata callback data + */ +static void +metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data) +{ + struct metadata_cb_data *data = (struct metadata_cb_data *) user_data; + + struct ra_metadata_s *md = NULL; + lrm_state_t *lrm_state = lrm_state_find(lrm_op_target(data->input_xml)); + + if ((lrm_state != NULL) && pcmk__result_ok(result)) { + md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc, + result->action_stdout); + } + do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md); + free_metadata_cb_data(data); +} + +/* A_LRM_INVOKE */ +void +do_lrm_invoke(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + lrm_state_t *lrm_state = NULL; + const char *crm_op = NULL; + const char *from_sys = NULL; + const char *from_host = NULL; + const char *operation = NULL; + ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); + const char *user_name = NULL; + const char *target_node = lrm_op_target(input->xml); + gboolean is_remote_node = FALSE; + bool crm_rsc_delete = FALSE; + + // Message routed to the local node is targeting a specific, non-local node + is_remote_node = !pcmk__str_eq(target_node, controld_globals.our_nodename, + pcmk__str_casei); + + lrm_state = lrm_state_find(target_node); + if ((lrm_state == NULL) && is_remote_node) { + crm_err("Failing action because local node has never had connection to remote node %s", + target_node); + synthesize_lrmd_failure(NULL, input->xml, PCMK_EXEC_NOT_CONNECTED, + PCMK_OCF_UNKNOWN_ERROR, + "Local node has no connection to remote"); + return; + } + CRM_ASSERT(lrm_state != NULL); + + user_name = pcmk__update_acl_user(input->msg, F_CRM_USER, NULL); + crm_op = crm_element_value(input->msg, F_CRM_TASK); + from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); + if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) { + from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); + } + + if (pcmk__str_eq(crm_op, CRM_OP_LRM_DELETE, pcmk__str_none)) { + if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) { + crm_rsc_delete = TRUE; // from crm_resource + } + operation = CRMD_ACTION_DELETE; + + } else if (input->xml != NULL) { + operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); + } + + CRM_CHECK(!pcmk__str_empty(crm_op) || !pcmk__str_empty(operation), return); + + crm_trace("'%s' execution request from %s as %s user", + pcmk__s(crm_op, operation), + pcmk__s(from_sys, "unknown subsystem"), + pcmk__s(user_name, "current")); + + if (pcmk__str_eq(crm_op, CRM_OP_LRM_FAIL, pcmk__str_none)) { + fail_lrm_resource(input->xml, lrm_state, user_name, from_host, + from_sys); + + } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_REFRESH, pcmk__str_none)) { + /* @COMPAT This can only be sent by crm_resource --refresh on a + * Pacemaker Remote node running Pacemaker 1.1.9, which is extremely + * unlikely. It previously would cause the controller to re-write its + * resource history to the CIB. Just ignore it. + */ + crm_notice("Ignoring refresh request from Pacemaker Remote 1.1.9 node"); + + // @COMPAT DCs <1.1.14 in a rolling upgrade might schedule this op + } else if (pcmk__str_eq(operation, CRM_OP_PROBED, pcmk__str_none)) { + update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, + user_name, is_remote_node); + + } else if (pcmk__str_eq(crm_op, CRM_OP_REPROBE, pcmk__str_none) + || pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) { + const char *raw_target = NULL; + + if (input->xml != NULL) { + // For CRM_OP_REPROBE, a NULL target means we're targeting all nodes + raw_target = crm_element_value(input->xml, XML_LRM_ATTR_TARGET); + } + handle_reprobe_op(lrm_state, from_sys, from_host, user_name, + is_remote_node, (raw_target == NULL)); + + } else if (operation != NULL) { + lrmd_rsc_info_t *rsc = NULL; + xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); + gboolean create_rsc = !pcmk__str_eq(operation, CRMD_ACTION_DELETE, + pcmk__str_none); + int rc; + + // We can't return anything meaningful without a resource ID + CRM_CHECK(xml_rsc && ID(xml_rsc), return); + + rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc); + if (rc == -ENOTCONN) { + synthesize_lrmd_failure(lrm_state, input->xml, + PCMK_EXEC_NOT_CONNECTED, + PCMK_OCF_UNKNOWN_ERROR, + "Not connected to remote executor"); + return; + + } else if ((rc < 0) && !create_rsc) { + /* Delete of malformed or nonexistent resource + * (deleting something that does not exist is a success) + */ + crm_notice("Not registering resource '%s' for a %s event " + CRM_XS " get-rc=%d (%s) transition-key=%s", + ID(xml_rsc), operation, + rc, pcmk_strerror(rc), ID(input->xml)); + delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, + user_name, true); + return; + + } else if (rc == -EINVAL) { + // Resource operation on malformed resource + crm_err("Invalid resource definition for %s", ID(xml_rsc)); + crm_log_xml_warn(input->msg, "invalid resource"); + synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR, + PCMK_OCF_NOT_CONFIGURED, // fatal error + "Invalid resource definition"); + return; + + } else if (rc < 0) { + // Error communicating with the executor + crm_err("Could not register resource '%s' with executor: %s " + CRM_XS " rc=%d", + ID(xml_rsc), pcmk_strerror(rc), rc); + crm_log_xml_warn(input->msg, "failed registration"); + synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR, + PCMK_OCF_INVALID_PARAM, // hard error + "Could not register resource with executor"); + return; + } + + if (pcmk__str_eq(operation, CRMD_ACTION_CANCEL, pcmk__str_none)) { + if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) { + crm_log_xml_warn(input->xml, "Bad command"); + } + + } else if (pcmk__str_eq(operation, CRMD_ACTION_DELETE, pcmk__str_none)) { + do_lrm_delete(input, lrm_state, rsc, from_sys, from_host, + crm_rsc_delete, user_name); + + } else { + struct ra_metadata_s *md = NULL; + + /* Getting metadata from cache is OK except for start actions -- + * always refresh from the agent for those, in case the resource + * agent was updated. + * + * @TODO Only refresh metadata for starts if the agent actually + * changed (using something like inotify, or a hash or modification + * time of the agent executable). + */ + if (strcmp(operation, CRMD_ACTION_START) != 0) { + md = controld_get_rsc_metadata(lrm_state, rsc, + controld_metadata_from_cache); + } + + if ((md == NULL) && crm_op_needs_metadata(rsc->standard, + operation)) { + /* Most likely, we'll need the agent metadata to record the + * pending operation and the operation result. Get it now rather + * than wait until then, so the metadata action doesn't eat into + * the real action's timeout. + * + * @TODO Metadata is retrieved via direct execution of the + * agent, which has a couple of related issues: the executor + * should execute agents, not the controller; and metadata for + * Pacemaker Remote nodes should be collected on those nodes, + * not locally. + */ + struct metadata_cb_data *data = NULL; + + data = new_metadata_cb_data(rsc, input->xml); + crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously", + rsc->id, rsc->standard, + ((rsc->provider == NULL)? "" : ":"), + ((rsc->provider == NULL)? "" : rsc->provider), + rsc->type); + (void) lrmd__metadata_async(rsc, metadata_complete, + (void *) data); + } else { + do_lrm_rsc_op(lrm_state, rsc, input->xml, md); + } + } + + lrmd_free_rsc_info(rsc); + + } else { + crm_err("Invalid execution request: unknown command '%s' (bug?)", + crm_op); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } +} + +static lrmd_event_data_t * +construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op, + const char *rsc_id, const char *operation) +{ + lrmd_event_data_t *op = NULL; + const char *op_delay = NULL; + const char *op_timeout = NULL; + GHashTable *params = NULL; + + xmlNode *primitive = NULL; + const char *class = NULL; + + const char *transition = NULL; + + CRM_ASSERT(rsc_id && operation); + + op = lrmd_new_event(rsc_id, operation, 0); + op->type = lrmd_event_exec_complete; + op->timeout = 0; + op->start_delay = 0; + lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); + + if (rsc_op == NULL) { + CRM_LOG_ASSERT(pcmk__str_eq(CRMD_ACTION_STOP, operation, pcmk__str_casei)); + op->user_data = NULL; + /* the stop_all_resources() case + * by definition there is no DC (or they'd be shutting + * us down). + * So we should put our version here. + */ + op->params = pcmk__strkey_table(free, free); + + g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); + + crm_trace("Constructed %s op for %s", operation, rsc_id); + return op; + } + + params = xml2list(rsc_op); + g_hash_table_remove(params, CRM_META "_op_target_rc"); + + op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY); + pcmk__scan_min_int(op_delay, &op->start_delay, 0); + + op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT); + pcmk__scan_min_int(op_timeout, &op->timeout, 0); + + if (pcmk__guint_from_hash(params, CRM_META "_" XML_LRM_ATTR_INTERVAL_MS, 0, + &(op->interval_ms)) != pcmk_rc_ok) { + op->interval_ms = 0; + } + + /* Use pcmk_monitor_timeout instead of meta timeout for stonith + recurring monitor, if set */ + primitive = find_xml_node(rsc_op, XML_CIB_TAG_RESOURCE, FALSE); + class = crm_element_value(primitive, XML_AGENT_ATTR_CLASS); + + if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_fence_params) + && pcmk__str_eq(operation, CRMD_ACTION_STATUS, pcmk__str_casei) + && (op->interval_ms > 0)) { + + op_timeout = g_hash_table_lookup(params, "pcmk_monitor_timeout"); + if (op_timeout != NULL) { + op->timeout = crm_get_msec(op_timeout); + } + } + + if (!pcmk__str_eq(operation, RSC_STOP, pcmk__str_casei)) { + op->params = params; + + } else { + rsc_history_t *entry = NULL; + + if (lrm_state) { + entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); + } + + /* If we do not have stop parameters cached, use + * whatever we are given */ + if (!entry || !entry->stop_params) { + op->params = params; + } else { + /* Copy the cached parameter list so that we stop the resource + * with the old attributes, not the new ones */ + op->params = pcmk__strkey_table(free, free); + + g_hash_table_foreach(params, copy_meta_keys, op->params); + g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params); + g_hash_table_destroy(params); + params = NULL; + } + } + + /* sanity */ + if (op->timeout <= 0) { + op->timeout = op->interval_ms; + } + if (op->start_delay < 0) { + op->start_delay = 0; + } + + transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); + CRM_CHECK(transition != NULL, return op); + + op->user_data = strdup(transition); + + if (op->interval_ms != 0) { + if (pcmk__strcase_any_of(operation, CRMD_ACTION_START, CRMD_ACTION_STOP, NULL)) { + crm_err("Start and Stop actions cannot have an interval: %u", + op->interval_ms); + op->interval_ms = 0; + } + } + + crm_trace("Constructed %s op for %s: interval=%u", + operation, rsc_id, op->interval_ms); + + return op; +} + +/*! + * \internal + * \brief Send a (synthesized) event result + * + * Reply with a synthesized event result directly, as opposed to going through + * the executor. + * + * \param[in] to_host Host to send result to + * \param[in] to_sys IPC name to send result (NULL for transition engine) + * \param[in] rsc Type information about resource the result is for + * \param[in,out] op Event with result to send + * \param[in] rsc_id ID of resource the result is for + */ +void +controld_ack_event_directly(const char *to_host, const char *to_sys, + const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, + const char *rsc_id) +{ + xmlNode *reply = NULL; + xmlNode *update, *iter; + crm_node_t *peer = NULL; + + CRM_CHECK(op != NULL, return); + if (op->rsc_id == NULL) { + CRM_ASSERT(rsc_id != NULL); + op->rsc_id = strdup(rsc_id); + } + if (to_sys == NULL) { + to_sys = CRM_SYSTEM_TENGINE; + } + + peer = crm_get_peer(0, controld_globals.our_nodename); + update = create_node_state_update(peer, node_update_none, NULL, + __func__); + + iter = create_xml_node(update, XML_CIB_TAG_LRM); + crm_xml_add(iter, XML_ATTR_ID, controld_globals.our_uuid); + iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); + iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); + + crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); + + controld_add_resource_history_xml(iter, rsc, op, + controld_globals.our_nodename); + reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); + + crm_log_xml_trace(update, "[direct ACK]"); + + crm_debug("ACK'ing resource op " PCMK__OP_FMT " from %s: %s", + op->rsc_id, op->op_type, op->interval_ms, op->user_data, + crm_element_value(reply, XML_ATTR_REFERENCE)); + + if (relay_message(reply, TRUE) == FALSE) { + crm_log_xml_err(reply, "Unable to route reply"); + } + + free_xml(update); + free_xml(reply); +} + +gboolean +verify_stopped(enum crmd_fsa_state cur_state, int log_level) +{ + gboolean res = TRUE; + GList *lrm_state_list = lrm_state_get_list(); + GList *state_entry; + + for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { + lrm_state_t *lrm_state = state_entry->data; + + if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) { + /* keep iterating through all even when false is returned */ + res = FALSE; + } + } + + controld_set_fsa_input_flags(R_SENT_RSC_STOP); + g_list_free(lrm_state_list); lrm_state_list = NULL; + return res; +} + +struct stop_recurring_action_s { + lrmd_rsc_info_t *rsc; + lrm_state_t *lrm_state; +}; + +static gboolean +stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) +{ + gboolean remove = FALSE; + struct stop_recurring_action_s *event = user_data; + active_op_t *op = value; + + if ((op->interval_ms != 0) + && pcmk__str_eq(op->rsc_id, event->rsc->id, pcmk__str_none)) { + + crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key); + remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE); + } + + return remove; +} + +static gboolean +stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) +{ + gboolean remove = FALSE; + lrm_state_t *lrm_state = user_data; + active_op_t *op = value; + + if (op->interval_ms != 0) { + crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, + (const char *) key); + remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE); + } + + return remove; +} + +/*! + * \internal + * \brief Check whether recurring actions should be cancelled before an action + * + * \param[in] rsc_id Resource that action is for + * \param[in] action Action being performed + * \param[in] interval_ms Operation interval of \p action (in milliseconds) + * + * \return true if recurring actions should be cancelled, otherwise false + */ +static bool +should_cancel_recurring(const char *rsc_id, const char *action, guint interval_ms) +{ + if (is_remote_lrmd_ra(NULL, NULL, rsc_id) && (interval_ms == 0) + && (strcmp(action, CRMD_ACTION_MIGRATE) == 0)) { + /* Don't stop monitoring a migrating Pacemaker Remote connection + * resource until the entire migration has completed. We must detect if + * the connection is unexpectedly severed, even during a migration. + */ + return false; + } + + // Cancel recurring actions before changing resource state + return (interval_ms == 0) + && !pcmk__str_any_of(action, CRMD_ACTION_STATUS, CRMD_ACTION_NOTIFY, + NULL); +} + +/*! + * \internal + * \brief Check whether an action should not be performed at this time + * + * \param[in] operation Action to be performed + * + * \return Readable description of why action should not be performed, + * or NULL if it should be performed + */ +static const char * +should_nack_action(const char *action) +{ + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN) + && pcmk__str_eq(action, RSC_START, pcmk__str_none)) { + + register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); + return "Not attempting start due to shutdown in progress"; + } + + switch (controld_globals.fsa_state) { + case S_NOT_DC: + case S_POLICY_ENGINE: // Recalculating + case S_TRANSITION_ENGINE: + break; + default: + if (!pcmk__str_eq(action, CRMD_ACTION_STOP, pcmk__str_none)) { + return "Controller cannot attempt actions at this time"; + } + break; + } + return NULL; +} + +static void +do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg, + struct ra_metadata_s *md) +{ + int rc; + int call_id = 0; + char *op_id = NULL; + lrmd_event_data_t *op = NULL; + fsa_data_t *msg_data = NULL; + const char *transition = NULL; + const char *operation = NULL; + const char *nack_reason = NULL; + + CRM_CHECK((rsc != NULL) && (msg != NULL), return); + + operation = crm_element_value(msg, XML_LRM_ATTR_TASK); + CRM_CHECK(!pcmk__str_empty(operation), return); + + transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); + if (pcmk__str_empty(transition)) { + crm_log_xml_err(msg, "Missing transition number"); + } + + if (lrm_state == NULL) { + // This shouldn't be possible, but provide a failsafe just in case + crm_err("Cannot execute %s of %s: No executor connection " + CRM_XS " transition_key=%s", + operation, rsc->id, pcmk__s(transition, "")); + synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID, + PCMK_OCF_UNKNOWN_ERROR, + "No executor connection"); + return; + } + + if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD, + CRMD_ACTION_RELOAD_AGENT, NULL)) { + /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs + * will schedule reload-agent actions only. In either case, we need + * to map that to whatever the resource agent actually supports. + * Default to the OCF 1.1 name. + */ + if ((md != NULL) + && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) { + operation = CRMD_ACTION_RELOAD; + } else { + operation = CRMD_ACTION_RELOAD_AGENT; + } + } + + op = construct_op(lrm_state, msg, rsc->id, operation); + CRM_CHECK(op != NULL, return); + + if (should_cancel_recurring(rsc->id, operation, op->interval_ms)) { + guint removed = 0; + struct stop_recurring_action_s data; + + data.rsc = rsc; + data.lrm_state = lrm_state; + removed = g_hash_table_foreach_remove(lrm_state->active_ops, + stop_recurring_action_by_rsc, + &data); + + if (removed) { + crm_debug("Stopped %u recurring operation%s in preparation for " + PCMK__OP_FMT, removed, pcmk__plural_s(removed), + rsc->id, operation, op->interval_ms); + } + } + + /* now do the op */ + crm_notice("Requesting local execution of %s operation for %s on %s " + CRM_XS " transition_key=%s op_key=" PCMK__OP_FMT, + crm_action_str(op->op_type, op->interval_ms), rsc->id, lrm_state->node_name, + pcmk__s(transition, ""), rsc->id, operation, op->interval_ms); + + nack_reason = should_nack_action(operation); + if (nack_reason != NULL) { + crm_notice("Discarding attempt to perform action %s on %s in state %s " + "(shutdown=%s)", operation, rsc->id, + fsa_state2string(controld_globals.fsa_state), + pcmk__btoa(pcmk_is_set(controld_globals.fsa_input_register, + R_SHUTDOWN))); + + lrmd__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_INVALID, + nack_reason); + controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); + lrmd_free_event(op); + free(op_id); + return; + } + + controld_record_pending_op(lrm_state->node_name, rsc, op); + + op_id = pcmk__op_key(rsc->id, op->op_type, op->interval_ms); + + if (op->interval_ms > 0) { + /* cancel it so we can then restart it without conflict */ + cancel_op_key(lrm_state, rsc, op_id, FALSE); + } + + rc = controld_execute_resource_agent(lrm_state, rsc->id, op->op_type, + op->user_data, op->interval_ms, + op->timeout, op->start_delay, + op->params, &call_id); + if (rc == pcmk_rc_ok) { + /* record all operations so we can wait + * for them to complete during shutdown + */ + char *call_id_s = make_stop_id(rsc->id, call_id); + active_op_t *pending = NULL; + + pending = calloc(1, sizeof(active_op_t)); + crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); + + pending->call_id = call_id; + pending->interval_ms = op->interval_ms; + pending->op_type = strdup(operation); + pending->op_key = strdup(op_id); + pending->rsc_id = strdup(rsc->id); + pending->start_time = time(NULL); + pcmk__str_update(&pending->user_data, op->user_data); + if (crm_element_value_epoch(msg, XML_CONFIG_ATTR_SHUTDOWN_LOCK, + &(pending->lock_time)) != pcmk_ok) { + pending->lock_time = 0; + } + g_hash_table_replace(lrm_state->active_ops, call_id_s, pending); + + if ((op->interval_ms > 0) + && (op->start_delay > START_DELAY_THRESHOLD)) { + int target_rc = PCMK_OCF_OK; + + crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); + decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc); + lrmd__set_result(op, target_rc, PCMK_EXEC_DONE, NULL); + controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); + } + + pending->params = op->params; + op->params = NULL; + + } else if (lrm_state_is_local(lrm_state)) { + crm_err("Could not initiate %s action for resource %s locally: %s " + CRM_XS " rc=%d", operation, rsc->id, pcmk_rc_str(rc), rc); + fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED, + PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc)); + process_lrm_event(lrm_state, op, NULL, NULL); + register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); + + } else { + crm_err("Could not initiate %s action for resource %s remotely on %s: " + "%s " CRM_XS " rc=%d", + operation, rsc->id, lrm_state->node_name, pcmk_rc_str(rc), rc); + fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED, + PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc)); + process_lrm_event(lrm_state, op, NULL, NULL); + } + + free(op_id); + lrmd_free_event(op); +} + +void +do_lrm_event(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data) +{ + CRM_CHECK(FALSE, return); +} + +static char * +unescape_newlines(const char *string) +{ + char *pch = NULL; + char *ret = NULL; + static const char *escaped_newline = "\\n"; + + if (!string) { + return NULL; + } + + ret = strdup(string); + pch = strstr(ret, escaped_newline); + while (pch != NULL) { + /* Replace newline escape pattern with actual newline (and a space so we + * don't have to shuffle the rest of the buffer) + */ + pch[0] = '\n'; + pch[1] = ' '; + pch = strstr(pch, escaped_newline); + } + + return ret; +} + +static bool +did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id, + const char * op_type, guint interval_ms) +{ + rsc_history_t *entry = NULL; + + CRM_CHECK(lrm_state != NULL, return FALSE); + CRM_CHECK(rsc_id != NULL, return FALSE); + CRM_CHECK(op_type != NULL, return FALSE); + + entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); + if (entry == NULL || entry->failed == NULL) { + return FALSE; + } + + if (pcmk__str_eq(entry->failed->rsc_id, rsc_id, pcmk__str_none) + && pcmk__str_eq(entry->failed->op_type, op_type, pcmk__str_casei) + && entry->failed->interval_ms == interval_ms) { + return TRUE; + } + + return FALSE; +} + +/*! + * \internal + * \brief Log the result of an executor action (actual or synthesized) + * + * \param[in] op Executor action to log result for + * \param[in] op_key Operation key for action + * \param[in] node_name Name of node action was performed on, if known + * \param[in] confirmed Whether to log that graph action was confirmed + */ +static void +log_executor_event(const lrmd_event_data_t *op, const char *op_key, + const char *node_name, gboolean confirmed) +{ + int log_level = LOG_ERR; + GString *str = g_string_sized_new(100); // reasonable starting size + + pcmk__g_strcat(str, + "Result of ", crm_action_str(op->op_type, op->interval_ms), + " operation for ", op->rsc_id, NULL); + + if (node_name != NULL) { + pcmk__g_strcat(str, " on ", node_name, NULL); + } + + switch (op->op_status) { + case PCMK_EXEC_DONE: + log_level = LOG_NOTICE; + pcmk__g_strcat(str, ": ", services_ocf_exitcode_str(op->rc), NULL); + break; + + case PCMK_EXEC_TIMEOUT: + pcmk__g_strcat(str, + ": ", pcmk_exec_status_str(op->op_status), " after ", + pcmk__readable_interval(op->timeout), NULL); + break; + + case PCMK_EXEC_CANCELLED: + log_level = LOG_INFO; + /* order of __attribute__ and Fall through comment is IMPORTANT! + * do not change it without proper testing with both clang and gcc + * in multiple versions. + * the clang check allows to build with all versions of clang. + * the has_c_attribute check is to workaround a bug in clang version + * in rhel7. has_attribute would happily return "YES SIR WE GOT IT" + * and fail the build the next line. + */ +#ifdef __clang__ +#ifdef __has_c_attribute +#if __has_attribute(fallthrough) + __attribute__((fallthrough)); +#endif +#endif +#endif + // Fall through + default: + pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status), + NULL); + } + + if ((op->exit_reason != NULL) + && ((op->op_status != PCMK_EXEC_DONE) || (op->rc != PCMK_OCF_OK))) { + + pcmk__g_strcat(str, " (", op->exit_reason, ")", NULL); + } + + g_string_append(str, " " CRM_XS); + g_string_append_printf(str, " graph action %sconfirmed; call=%d key=%s", + (confirmed? "" : "un"), op->call_id, op_key); + if (op->op_status == PCMK_EXEC_DONE) { + g_string_append_printf(str, " rc=%d", op->rc); + } + + do_crm_log(log_level, "%s", str->str); + g_string_free(str, TRUE); + + /* The services library has already logged the output at info or debug + * level, so just raise to notice if it looks like a failure. + */ + if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) { + char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output", + op->rsc_id, op->op_type, + op->interval_ms, node_name); + + crm_log_output(LOG_NOTICE, prefix, op->output); + free(prefix); + } +} + +void +process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, + active_op_t *pending, const xmlNode *action_xml) +{ + char *op_id = NULL; + char *op_key = NULL; + + gboolean remove = FALSE; + gboolean removed = FALSE; + bool need_direct_ack = FALSE; + lrmd_rsc_info_t *rsc = NULL; + const char *node_name = NULL; + + CRM_CHECK(op != NULL, return); + CRM_CHECK(op->rsc_id != NULL, return); + + // Remap new status codes for older DCs + if (compare_version(controld_globals.dc_version, "3.2.0") < 0) { + switch (op->op_status) { + case PCMK_EXEC_NOT_CONNECTED: + lrmd__set_result(op, PCMK_OCF_CONNECTION_DIED, + PCMK_EXEC_ERROR, op->exit_reason); + break; + case PCMK_EXEC_INVALID: + lrmd__set_result(op, CRM_DIRECT_NACK_RC, PCMK_EXEC_ERROR, + op->exit_reason); + break; + default: + break; + } + } + + op_id = make_stop_id(op->rsc_id, op->call_id); + op_key = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms); + + // Get resource info if available (from executor state or action XML) + if (lrm_state) { + rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); + } + if ((rsc == NULL) && action_xml) { + xmlNode *xml = find_xml_node(action_xml, XML_CIB_TAG_RESOURCE, TRUE); + + const char *standard = crm_element_value(xml, XML_AGENT_ATTR_CLASS); + const char *provider = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER); + const char *type = crm_element_value(xml, XML_ATTR_TYPE); + + if (standard && type) { + crm_info("%s agent information not cached, using %s%s%s:%s from action XML", + op->rsc_id, standard, + (provider? ":" : ""), (provider? provider : ""), type); + rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type); + } else { + crm_err("Can't process %s result because %s agent information not cached or in XML", + op_key, op->rsc_id); + } + } + + // Get node name if available (from executor state or action XML) + if (lrm_state) { + node_name = lrm_state->node_name; + } else if (action_xml) { + node_name = crm_element_value(action_xml, XML_LRM_ATTR_TARGET); + } + + if(pending == NULL) { + remove = TRUE; + if (lrm_state) { + pending = g_hash_table_lookup(lrm_state->active_ops, op_id); + } + } + + if (op->op_status == PCMK_EXEC_ERROR) { + switch(op->rc) { + case PCMK_OCF_NOT_RUNNING: + case PCMK_OCF_RUNNING_PROMOTED: + case PCMK_OCF_DEGRADED: + case PCMK_OCF_DEGRADED_PROMOTED: + // Leave it to the TE/scheduler to decide if this is an error + op->op_status = PCMK_EXEC_DONE; + break; + default: + /* Nothing to do */ + break; + } + } + + if (op->op_status != PCMK_EXEC_CANCELLED) { + /* We might not record the result, so directly acknowledge it to the + * originator instead, so it doesn't time out waiting for the result + * (especially important if part of a transition). + */ + need_direct_ack = TRUE; + + if (controld_action_is_recordable(op->op_type)) { + if (node_name && rsc) { + // We should record the result, and happily, we can + time_t lock_time = (pending == NULL)? 0 : pending->lock_time; + + controld_update_resource_history(node_name, rsc, op, lock_time); + need_direct_ack = FALSE; + + } else if (op->rsc_deleted) { + /* We shouldn't record the result (likely the resource was + * refreshed, cleaned, or removed while this operation was + * in flight). + */ + crm_notice("Not recording %s result in CIB because " + "resource information was removed since it was initiated", + op_key); + } else { + /* This shouldn't be possible; the executor didn't consider the + * resource deleted, but we couldn't find resource or node + * information. + */ + crm_err("Unable to record %s result in CIB: %s", op_key, + (node_name? "No resource information" : "No node name")); + } + } + + } else if (op->interval_ms == 0) { + /* A non-recurring operation was cancelled. Most likely, the + * never-initiated action was removed from the executor's pending + * operations list upon resource removal. + */ + need_direct_ack = TRUE; + + } else if (pending == NULL) { + /* This recurring operation was cancelled, but was not pending. No + * transition actions are waiting on it, nothing needs to be done. + */ + + } else if (op->user_data == NULL) { + /* This recurring operation was cancelled and pending, but we don't + * have a transition key. This should never happen. + */ + crm_err("Recurring operation %s was cancelled without transition information", + op_key); + + } else if (pcmk_is_set(pending->flags, active_op_remove)) { + /* This recurring operation was cancelled (by us) and pending, and we + * have been waiting for it to finish. + */ + if (lrm_state) { + controld_delete_action_history(op); + } + + /* Directly acknowledge failed recurring actions here. The above call to + * controld_delete_action_history() will not erase any corresponding + * last_failure entry, which means that the DC won't confirm the + * cancellation via process_op_deletion(), and the transition would + * otherwise wait for the action timer to pop. + */ + if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id, + pending->op_type, pending->interval_ms)) { + need_direct_ack = TRUE; + } + + } else if (op->rsc_deleted) { + /* This recurring operation was cancelled (but not by us, and the + * executor does not have resource information, likely due to resource + * cleanup, refresh, or removal) and pending. + */ + crm_debug("Recurring op %s was cancelled due to resource deletion", + op_key); + need_direct_ack = TRUE; + + } else { + /* This recurring operation was cancelled (but not by us, likely by the + * executor before stopping the resource) and pending. We don't need to + * do anything special. + */ + } + + if (need_direct_ack) { + controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id); + } + + if(remove == FALSE) { + /* The caller will do this afterwards, but keep the logging consistent */ + removed = TRUE; + + } else if (lrm_state && ((op->interval_ms == 0) + || (op->op_status == PCMK_EXEC_CANCELLED))) { + + gboolean found = g_hash_table_remove(lrm_state->active_ops, op_id); + + if (op->interval_ms != 0) { + removed = TRUE; + } else if (found) { + removed = TRUE; + crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", + op_key, op->call_id, op_id, + g_hash_table_size(lrm_state->active_ops)); + } + } + + log_executor_event(op, op_key, node_name, removed); + + if (lrm_state) { + if (!pcmk__str_eq(op->op_type, RSC_METADATA, pcmk__str_casei)) { + crmd_alert_resource_op(lrm_state->node_name, op); + } else if (rsc && (op->rc == PCMK_OCF_OK)) { + char *metadata = unescape_newlines(op->output); + + controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata); + free(metadata); + } + } + + if (op->rsc_deleted) { + crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); + if (lrm_state) { + delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL, + true); + } + } + + /* If a shutdown was escalated while operations were pending, + * then the FSA will be stalled right now... allow it to continue + */ + controld_trigger_fsa(); + if (lrm_state && rsc) { + update_history_cache(lrm_state, rsc, op); + } + + lrmd_free_rsc_info(rsc); + free(op_key); + free(op_id); +} diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c new file mode 100644 index 0000000..8c68bfc --- /dev/null +++ b/daemons/controld/controld_execd_state.c @@ -0,0 +1,814 @@ +/* + * Copyright 2012-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +static GHashTable *lrm_state_table = NULL; +extern GHashTable *proxy_table; +int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); +void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); + +static void +free_rsc_info(gpointer value) +{ + lrmd_rsc_info_t *rsc_info = value; + + lrmd_free_rsc_info(rsc_info); +} + +static void +free_deletion_op(gpointer value) +{ + struct pending_deletion_op_s *op = value; + + free(op->rsc); + delete_ha_msg_input(op->input); + free(op); +} + +static void +free_recurring_op(gpointer value) +{ + active_op_t *op = value; + + free(op->user_data); + free(op->rsc_id); + free(op->op_type); + free(op->op_key); + if (op->params) { + g_hash_table_destroy(op->params); + } + free(op); +} + +static gboolean +fail_pending_op(gpointer key, gpointer value, gpointer user_data) +{ + lrmd_event_data_t event = { 0, }; + lrm_state_t *lrm_state = user_data; + active_op_t *op = value; + + crm_trace("Pre-emptively failing " PCMK__OP_FMT " on %s (call=%s, %s)", + op->rsc_id, op->op_type, op->interval_ms, + lrm_state->node_name, (char*)key, op->user_data); + + event.type = lrmd_event_exec_complete; + event.rsc_id = op->rsc_id; + event.op_type = op->op_type; + event.user_data = op->user_data; + event.timeout = 0; + event.interval_ms = op->interval_ms; + lrmd__set_result(&event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_NOT_CONNECTED, + "Action was pending when executor connection was dropped"); + event.t_run = (unsigned int) op->start_time; + event.t_rcchange = (unsigned int) op->start_time; + + event.call_id = op->call_id; + event.remote_nodename = lrm_state->node_name; + event.params = op->params; + + process_lrm_event(lrm_state, &event, op, NULL); + lrmd__reset_result(&event); + return TRUE; +} + +gboolean +lrm_state_is_local(lrm_state_t *lrm_state) +{ + return (lrm_state != NULL) + && pcmk__str_eq(lrm_state->node_name, controld_globals.our_nodename, + pcmk__str_casei); +} + +/*! + * \internal + * \brief Create executor state entry for a node and add it to the state table + * + * \param[in] node_name Node to create entry for + * + * \return Newly allocated executor state object initialized for \p node_name + */ +static lrm_state_t * +lrm_state_create(const char *node_name) +{ + lrm_state_t *state = NULL; + + if (!node_name) { + crm_err("No node name given for lrm state object"); + return NULL; + } + + state = calloc(1, sizeof(lrm_state_t)); + if (!state) { + return NULL; + } + + state->node_name = strdup(node_name); + state->rsc_info_cache = pcmk__strkey_table(NULL, free_rsc_info); + state->deletion_ops = pcmk__strkey_table(free, free_deletion_op); + state->active_ops = pcmk__strkey_table(free, free_recurring_op); + state->resource_history = pcmk__strkey_table(NULL, history_free); + state->metadata_cache = metadata_cache_new(); + + g_hash_table_insert(lrm_state_table, (char *)state->node_name, state); + return state; +} + +void +lrm_state_destroy(const char *node_name) +{ + g_hash_table_remove(lrm_state_table, node_name); +} + +static gboolean +remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data) +{ + remote_proxy_t *proxy = value; + const char *node_name = user_data; + + if (pcmk__str_eq(node_name, proxy->node_name, pcmk__str_casei)) { + return TRUE; + } + + return FALSE; +} + +static remote_proxy_t * +find_connected_proxy_by_node(const char * node_name) +{ + GHashTableIter gIter; + remote_proxy_t *proxy = NULL; + + CRM_CHECK(proxy_table != NULL, return NULL); + + g_hash_table_iter_init(&gIter, proxy_table); + + while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) &proxy)) { + if (proxy->source + && pcmk__str_eq(node_name, proxy->node_name, pcmk__str_casei)) { + return proxy; + } + } + + return NULL; +} + +static void +remote_proxy_disconnect_by_node(const char * node_name) +{ + remote_proxy_t *proxy = NULL; + + CRM_CHECK(proxy_table != NULL, return); + + while ((proxy = find_connected_proxy_by_node(node_name)) != NULL) { + /* mainloop_del_ipc_client() eventually calls remote_proxy_disconnected() + * , which removes the entry from proxy_table. + * Do not do this in a g_hash_table_iter_next() loop. */ + if (proxy->source) { + mainloop_del_ipc_client(proxy->source); + } + } + + return; +} + +static void +internal_lrm_state_destroy(gpointer data) +{ + lrm_state_t *lrm_state = data; + + if (!lrm_state) { + return; + } + + /* Rather than directly remove the recorded proxy entries from proxy_table, + * make sure any connected proxies get disconnected. So that + * remote_proxy_disconnected() will be called and as well remove the + * entries from proxy_table. + */ + remote_proxy_disconnect_by_node(lrm_state->node_name); + + crm_trace("Destroying proxy table %s with %u members", + lrm_state->node_name, g_hash_table_size(proxy_table)); + // Just in case there's still any leftovers in proxy_table + g_hash_table_foreach_remove(proxy_table, remote_proxy_remove_by_node, (char *) lrm_state->node_name); + remote_ra_cleanup(lrm_state); + lrmd_api_delete(lrm_state->conn); + + if (lrm_state->rsc_info_cache) { + crm_trace("Destroying rsc info cache with %u members", + g_hash_table_size(lrm_state->rsc_info_cache)); + g_hash_table_destroy(lrm_state->rsc_info_cache); + } + if (lrm_state->resource_history) { + crm_trace("Destroying history op cache with %u members", + g_hash_table_size(lrm_state->resource_history)); + g_hash_table_destroy(lrm_state->resource_history); + } + if (lrm_state->deletion_ops) { + crm_trace("Destroying deletion op cache with %u members", + g_hash_table_size(lrm_state->deletion_ops)); + g_hash_table_destroy(lrm_state->deletion_ops); + } + if (lrm_state->active_ops != NULL) { + crm_trace("Destroying pending op cache with %u members", + g_hash_table_size(lrm_state->active_ops)); + g_hash_table_destroy(lrm_state->active_ops); + } + metadata_cache_free(lrm_state->metadata_cache); + + free((char *)lrm_state->node_name); + free(lrm_state); +} + +void +lrm_state_reset_tables(lrm_state_t * lrm_state, gboolean reset_metadata) +{ + if (lrm_state->resource_history) { + crm_trace("Resetting resource history cache with %u members", + g_hash_table_size(lrm_state->resource_history)); + g_hash_table_remove_all(lrm_state->resource_history); + } + if (lrm_state->deletion_ops) { + crm_trace("Resetting deletion operations cache with %u members", + g_hash_table_size(lrm_state->deletion_ops)); + g_hash_table_remove_all(lrm_state->deletion_ops); + } + if (lrm_state->active_ops != NULL) { + crm_trace("Resetting active operations cache with %u members", + g_hash_table_size(lrm_state->active_ops)); + g_hash_table_remove_all(lrm_state->active_ops); + } + if (lrm_state->rsc_info_cache) { + crm_trace("Resetting resource information cache with %u members", + g_hash_table_size(lrm_state->rsc_info_cache)); + g_hash_table_remove_all(lrm_state->rsc_info_cache); + } + if (reset_metadata) { + metadata_cache_reset(lrm_state->metadata_cache); + } +} + +gboolean +lrm_state_init_local(void) +{ + if (lrm_state_table) { + return TRUE; + } + + lrm_state_table = pcmk__strikey_table(NULL, internal_lrm_state_destroy); + if (!lrm_state_table) { + return FALSE; + } + + proxy_table = pcmk__strikey_table(NULL, remote_proxy_free); + if (!proxy_table) { + g_hash_table_destroy(lrm_state_table); + lrm_state_table = NULL; + return FALSE; + } + + return TRUE; +} + +void +lrm_state_destroy_all(void) +{ + if (lrm_state_table) { + crm_trace("Destroying state table with %u members", + g_hash_table_size(lrm_state_table)); + g_hash_table_destroy(lrm_state_table); lrm_state_table = NULL; + } + if(proxy_table) { + crm_trace("Destroying proxy table with %u members", + g_hash_table_size(proxy_table)); + g_hash_table_destroy(proxy_table); proxy_table = NULL; + } +} + +lrm_state_t * +lrm_state_find(const char *node_name) +{ + if (!node_name) { + return NULL; + } + return g_hash_table_lookup(lrm_state_table, node_name); +} + +lrm_state_t * +lrm_state_find_or_create(const char *node_name) +{ + lrm_state_t *lrm_state; + + lrm_state = g_hash_table_lookup(lrm_state_table, node_name); + if (!lrm_state) { + lrm_state = lrm_state_create(node_name); + } + + return lrm_state; +} + +GList * +lrm_state_get_list(void) +{ + return g_hash_table_get_values(lrm_state_table); +} + +void +lrm_state_disconnect_only(lrm_state_t * lrm_state) +{ + int removed = 0; + + if (!lrm_state->conn) { + return; + } + crm_trace("Disconnecting %s", lrm_state->node_name); + + remote_proxy_disconnect_by_node(lrm_state->node_name); + + ((lrmd_t *) lrm_state->conn)->cmds->disconnect(lrm_state->conn); + + if (!pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + removed = g_hash_table_foreach_remove(lrm_state->active_ops, + fail_pending_op, lrm_state); + crm_trace("Synthesized %d operation failures for %s", removed, lrm_state->node_name); + } +} + +void +lrm_state_disconnect(lrm_state_t * lrm_state) +{ + if (!lrm_state->conn) { + return; + } + + lrm_state_disconnect_only(lrm_state); + + lrmd_api_delete(lrm_state->conn); + lrm_state->conn = NULL; +} + +int +lrm_state_is_connected(lrm_state_t * lrm_state) +{ + if (!lrm_state->conn) { + return FALSE; + } + return ((lrmd_t *) lrm_state->conn)->cmds->is_connected(lrm_state->conn); +} + +int +lrm_state_poke_connection(lrm_state_t * lrm_state) +{ + + if (!lrm_state->conn) { + return -ENOTCONN; + } + return ((lrmd_t *) lrm_state->conn)->cmds->poke_connection(lrm_state->conn); +} + +// \return Standard Pacemaker return code +int +controld_connect_local_executor(lrm_state_t *lrm_state) +{ + int rc = pcmk_rc_ok; + + if (lrm_state->conn == NULL) { + lrmd_t *api = NULL; + + rc = lrmd__new(&api, NULL, NULL, 0); + if (rc != pcmk_rc_ok) { + return rc; + } + api->cmds->set_callback(api, lrm_op_callback); + lrm_state->conn = api; + } + + rc = ((lrmd_t *) lrm_state->conn)->cmds->connect(lrm_state->conn, + CRM_SYSTEM_CRMD, NULL); + rc = pcmk_legacy2rc(rc); + + if (rc == pcmk_rc_ok) { + lrm_state->num_lrm_register_fails = 0; + } else { + lrm_state->num_lrm_register_fails++; + } + return rc; +} + +static remote_proxy_t * +crmd_remote_proxy_new(lrmd_t *lrmd, const char *node_name, const char *session_id, const char *channel) +{ + struct ipc_client_callbacks proxy_callbacks = { + .dispatch = remote_proxy_dispatch, + .destroy = remote_proxy_disconnected + }; + remote_proxy_t *proxy = remote_proxy_new(lrmd, &proxy_callbacks, node_name, + session_id, channel); + return proxy; +} + +gboolean +crmd_is_proxy_session(const char *session) +{ + return g_hash_table_lookup(proxy_table, session) ? TRUE : FALSE; +} + +void +crmd_proxy_send(const char *session, xmlNode *msg) +{ + remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); + lrm_state_t *lrm_state = NULL; + + if (!proxy) { + return; + } + crm_log_xml_trace(msg, "to-proxy"); + lrm_state = lrm_state_find(proxy->node_name); + if (lrm_state) { + crm_trace("Sending event to %.8s on %s", proxy->session_id, proxy->node_name); + remote_proxy_relay_event(proxy, msg); + } +} + +static void +crmd_proxy_dispatch(const char *session, xmlNode *msg) +{ + crm_trace("Processing proxied IPC message from session %s", session); + crm_log_xml_trace(msg, "controller[inbound]"); + crm_xml_add(msg, F_CRM_SYS_FROM, session); + if (controld_authorize_ipc_message(msg, NULL, session)) { + route_message(C_IPC_MESSAGE, msg); + } + controld_trigger_fsa(); +} + +static void +remote_config_check(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + if (rc != pcmk_ok) { + crm_err("Query resulted in an error: %s", pcmk_strerror(rc)); + + if (rc == -EACCES || rc == -pcmk_err_schema_validation) { + crm_err("The cluster is mis-configured - shutting down and staying down"); + } + + } else { + lrmd_t * lrmd = (lrmd_t *)user_data; + crm_time_t *now = crm_time_new(NULL); + GHashTable *config_hash = pcmk__strkey_table(free, free); + + crm_debug("Call %d : Parsing CIB options", call_id); + + pe_unpack_nvpairs(output, output, XML_CIB_TAG_PROPSET, NULL, + config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL); + + /* Now send it to the remote peer */ + lrmd__validate_remote_settings(lrmd, config_hash); + + g_hash_table_destroy(config_hash); + crm_time_free(now); + } +} + +static void +crmd_remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg) +{ + lrm_state_t *lrm_state = userdata; + const char *session = crm_element_value(msg, F_LRMD_IPC_SESSION); + remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); + + const char *op = crm_element_value(msg, F_LRMD_IPC_OP); + if (pcmk__str_eq(op, LRMD_IPC_OP_NEW, pcmk__str_casei)) { + const char *channel = crm_element_value(msg, F_LRMD_IPC_IPC_SERVER); + + proxy = crmd_remote_proxy_new(lrmd, lrm_state->node_name, session, channel); + if (!remote_ra_controlling_guest(lrm_state)) { + if (proxy != NULL) { + cib_t *cib_conn = controld_globals.cib_conn; + + /* Look up stonith-watchdog-timeout and send to the remote peer for validation */ + int rc = cib_conn->cmds->query(cib_conn, XML_CIB_TAG_CRMCONFIG, + NULL, cib_scope_local); + cib_conn->cmds->register_callback_full(cib_conn, rc, 10, FALSE, + lrmd, + "remote_config_check", + remote_config_check, + NULL); + } + } else { + crm_debug("Skipping remote_config_check for guest-nodes"); + } + + } else if (pcmk__str_eq(op, LRMD_IPC_OP_SHUTDOWN_REQ, pcmk__str_casei)) { + char *now_s = NULL; + + crm_notice("%s requested shutdown of its remote connection", + lrm_state->node_name); + + if (!remote_ra_is_in_maintenance(lrm_state)) { + now_s = pcmk__ttoa(time(NULL)); + update_attrd(lrm_state->node_name, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, TRUE); + free(now_s); + + remote_proxy_ack_shutdown(lrmd); + + crm_warn("Reconnection attempts to %s may result in failures that must be cleared", + lrm_state->node_name); + } else { + remote_proxy_nack_shutdown(lrmd); + + crm_notice("Remote resource for %s is not managed so no ordered shutdown happening", + lrm_state->node_name); + } + return; + + } else if (pcmk__str_eq(op, LRMD_IPC_OP_REQUEST, pcmk__str_casei) && proxy && proxy->is_local) { + /* This is for the controller, which we are, so don't try + * to send to ourselves over IPC -- do it directly. + */ + int flags = 0; + xmlNode *request = get_message_xml(msg, F_LRMD_IPC_MSG); + + CRM_CHECK(request != NULL, return); + CRM_CHECK(lrm_state->node_name, return); + crm_xml_add(request, XML_ACL_TAG_ROLE, "pacemaker-remote"); + pcmk__update_acl_user(request, F_LRMD_IPC_USER, lrm_state->node_name); + + /* Pacemaker Remote nodes don't know their own names (as known to the + * cluster). When getting a node info request with no name or ID, add + * the name, so we don't return info for ourselves instead of the + * Pacemaker Remote node. + */ + if (pcmk__str_eq(crm_element_value(request, F_CRM_TASK), CRM_OP_NODE_INFO, pcmk__str_casei)) { + int node_id = 0; + + crm_element_value_int(request, XML_ATTR_ID, &node_id); + if ((node_id <= 0) + && (crm_element_value(request, XML_ATTR_UNAME) == NULL)) { + crm_xml_add(request, XML_ATTR_UNAME, lrm_state->node_name); + } + } + + crmd_proxy_dispatch(session, request); + + crm_element_value_int(msg, F_LRMD_IPC_MSG_FLAGS, &flags); + if (flags & crm_ipc_client_response) { + int msg_id = 0; + xmlNode *op_reply = create_xml_node(NULL, "ack"); + + crm_xml_add(op_reply, "function", __func__); + crm_xml_add_int(op_reply, "line", __LINE__); + + crm_element_value_int(msg, F_LRMD_IPC_MSG_ID, &msg_id); + remote_proxy_relay_response(proxy, op_reply, msg_id); + + free_xml(op_reply); + } + + } else { + remote_proxy_cb(lrmd, lrm_state->node_name, msg); + } +} + + +// \return Standard Pacemaker return code +int +controld_connect_remote_executor(lrm_state_t *lrm_state, const char *server, + int port, int timeout_ms) +{ + int rc = pcmk_rc_ok; + + if (lrm_state->conn == NULL) { + lrmd_t *api = NULL; + + rc = lrmd__new(&api, lrm_state->node_name, server, port); + if (rc != pcmk_rc_ok) { + crm_warn("Pacemaker Remote connection to %s:%s failed: %s " + CRM_XS " rc=%d", server, port, pcmk_rc_str(rc), rc); + + return rc; + } + lrm_state->conn = api; + api->cmds->set_callback(api, remote_lrm_op_callback); + lrmd_internal_set_proxy_callback(api, lrm_state, crmd_remote_proxy_cb); + } + + crm_trace("Initiating remote connection to %s:%d with timeout %dms", + server, port, timeout_ms); + rc = ((lrmd_t *) lrm_state->conn)->cmds->connect_async(lrm_state->conn, + lrm_state->node_name, + timeout_ms); + if (rc == pcmk_ok) { + lrm_state->num_lrm_register_fails = 0; + } else { + lrm_state->num_lrm_register_fails++; // Ignored for remote connections + } + return pcmk_legacy2rc(rc); +} + +int +lrm_state_get_metadata(lrm_state_t * lrm_state, + const char *class, + const char *provider, + const char *agent, char **output, enum lrmd_call_options options) +{ + lrmd_key_value_t *params = NULL; + + if (!lrm_state->conn) { + return -ENOTCONN; + } + + /* Add the node name to the environment, as is done with normal resource + * action calls. Meta-data calls shouldn't need it, but some agents are + * written with an ocf_local_nodename call at the beginning regardless of + * action. Without the environment variable, the agent would try to contact + * the controller to get the node name -- but the controller would be + * blocking on the synchronous meta-data call. + * + * At this point, we have to assume that agents are unlikely to make other + * calls that require the controller, such as crm_node --quorum or + * --cluster-id. + * + * @TODO Make meta-data calls asynchronous. (This will be part of a larger + * project to make meta-data calls via the executor rather than directly.) + */ + params = lrmd_key_value_add(params, CRM_META "_" XML_LRM_ATTR_TARGET, + lrm_state->node_name); + + return ((lrmd_t *) lrm_state->conn)->cmds->get_metadata_params(lrm_state->conn, + class, provider, agent, output, options, params); +} + +int +lrm_state_cancel(lrm_state_t *lrm_state, const char *rsc_id, const char *action, + guint interval_ms) +{ + if (!lrm_state->conn) { + return -ENOTCONN; + } + + /* Figure out a way to make this async? + * NOTICE: Currently it's synced and directly acknowledged in do_lrm_invoke(). */ + if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { + return remote_ra_cancel(lrm_state, rsc_id, action, interval_ms); + } + return ((lrmd_t *) lrm_state->conn)->cmds->cancel(lrm_state->conn, rsc_id, + action, interval_ms); +} + +lrmd_rsc_info_t * +lrm_state_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options) +{ + lrmd_rsc_info_t *rsc = NULL; + + if (!lrm_state->conn) { + return NULL; + } + if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { + return remote_ra_get_rsc_info(lrm_state, rsc_id); + } + + rsc = g_hash_table_lookup(lrm_state->rsc_info_cache, rsc_id); + if (rsc == NULL) { + /* only contact the lrmd if we don't already have a cached rsc info */ + rsc = ((lrmd_t *) lrm_state->conn)->cmds->get_rsc_info(lrm_state->conn, rsc_id, options); + if (rsc == NULL) { + return NULL; + } + /* cache the result */ + g_hash_table_insert(lrm_state->rsc_info_cache, rsc->id, rsc); + } + + return lrmd_copy_rsc_info(rsc); + +} + +/*! + * \internal + * \brief Initiate a resource agent action + * + * \param[in,out] lrm_state Executor state object + * \param[in] rsc_id ID of resource for action + * \param[in] action Action to execute + * \param[in] userdata String to copy and pass to execution callback + * \param[in] interval_ms Action interval (in milliseconds) + * \param[in] timeout_ms Action timeout (in milliseconds) + * \param[in] start_delay_ms Delay (in ms) before initiating action + * \param[in] parameters Hash table of resource parameters + * \param[out] call_id Where to store call ID on success + * + * \return Standard Pacemaker return code + */ +int +controld_execute_resource_agent(lrm_state_t *lrm_state, const char *rsc_id, + const char *action, const char *userdata, + guint interval_ms, int timeout_ms, + int start_delay_ms, GHashTable *parameters, + int *call_id) +{ + int rc = pcmk_rc_ok; + lrmd_key_value_t *params = NULL; + + if (lrm_state->conn == NULL) { + return ENOTCONN; + } + + // Convert parameters from hash table to list + if (parameters != NULL) { + const char *key = NULL; + const char *value = NULL; + GHashTableIter iter; + + g_hash_table_iter_init(&iter, parameters); + while (g_hash_table_iter_next(&iter, (gpointer *) &key, + (gpointer *) &value)) { + params = lrmd_key_value_add(params, key, value); + } + } + + if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { + rc = controld_execute_remote_agent(lrm_state, rsc_id, action, + userdata, interval_ms, timeout_ms, + start_delay_ms, params, call_id); + + } else { + rc = ((lrmd_t *) lrm_state->conn)->cmds->exec(lrm_state->conn, rsc_id, + action, userdata, + interval_ms, timeout_ms, + start_delay_ms, + lrmd_opt_notify_changes_only, + params); + if (rc < 0) { + rc = pcmk_legacy2rc(rc); + } else { + *call_id = rc; + rc = pcmk_rc_ok; + } + } + return rc; +} + +int +lrm_state_register_rsc(lrm_state_t * lrm_state, + const char *rsc_id, + const char *class, + const char *provider, const char *agent, enum lrmd_call_options options) +{ + lrmd_t *conn = (lrmd_t *) lrm_state->conn; + + if (conn == NULL) { + return -ENOTCONN; + } + + if (is_remote_lrmd_ra(agent, provider, NULL)) { + return lrm_state_find_or_create(rsc_id)? pcmk_ok : -EINVAL; + } + + /* @TODO Implement an asynchronous version of this (currently a blocking + * call to the lrmd). + */ + return conn->cmds->register_rsc(lrm_state->conn, rsc_id, class, provider, + agent, options); +} + +int +lrm_state_unregister_rsc(lrm_state_t * lrm_state, + const char *rsc_id, enum lrmd_call_options options) +{ + if (!lrm_state->conn) { + return -ENOTCONN; + } + + if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { + lrm_state_destroy(rsc_id); + return pcmk_ok; + } + + g_hash_table_remove(lrm_state->rsc_info_cache, rsc_id); + + /* @TODO Optimize this ... this function is a blocking round trip from + * client to daemon. The controld_execd_state.c code path that uses this + * function should always treat it as an async operation. The executor API + * should make an async version available. + */ + return ((lrmd_t *) lrm_state->conn)->cmds->unregister_rsc(lrm_state->conn, rsc_id, options); +} diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c new file mode 100644 index 0000000..89cb61f --- /dev/null +++ b/daemons/controld/controld_fencing.c @@ -0,0 +1,1108 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include +#include +#include +#include +#include +#include + +#include + +static void +tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event); + +/* + * stonith failure counting + * + * We don't want to get stuck in a permanent fencing loop. Keep track of the + * number of fencing failures for each target node, and the most we'll restart a + * transition for. + */ + +struct st_fail_rec { + int count; +}; + +static bool fence_reaction_panic = false; +static unsigned long int stonith_max_attempts = 10; +static GHashTable *stonith_failures = NULL; + +/*! + * \internal + * \brief Update max fencing attempts before giving up + * + * \param[in] value New max fencing attempts + */ +static void +update_stonith_max_attempts(const char *value) +{ + stonith_max_attempts = char2score(value); + if (stonith_max_attempts < 1UL) { + stonith_max_attempts = 10UL; + } +} + +/*! + * \internal + * \brief Configure reaction to notification of local node being fenced + * + * \param[in] reaction_s Reaction type + */ +static void +set_fence_reaction(const char *reaction_s) +{ + if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) { + fence_reaction_panic = true; + + } else { + if (!pcmk__str_eq(reaction_s, "stop", pcmk__str_casei)) { + crm_warn("Invalid value '%s' for %s, using 'stop'", + reaction_s, XML_CONFIG_ATTR_FENCE_REACTION); + } + fence_reaction_panic = false; + } +} + +/*! + * \internal + * \brief Configure fencing options based on the CIB + * + * \param[in,out] options Name/value pairs for configured options + */ +void +controld_configure_fencing(GHashTable *options) +{ + const char *value = NULL; + + value = g_hash_table_lookup(options, XML_CONFIG_ATTR_FENCE_REACTION); + set_fence_reaction(value); + + value = g_hash_table_lookup(options, "stonith-max-attempts"); + update_stonith_max_attempts(value); +} + +static gboolean +too_many_st_failures(const char *target) +{ + GHashTableIter iter; + const char *key = NULL; + struct st_fail_rec *value = NULL; + + if (stonith_failures == NULL) { + return FALSE; + } + + if (target == NULL) { + g_hash_table_iter_init(&iter, stonith_failures); + while (g_hash_table_iter_next(&iter, (gpointer *) &key, + (gpointer *) &value)) { + + if (value->count >= stonith_max_attempts) { + target = (const char*)key; + goto too_many; + } + } + } else { + value = g_hash_table_lookup(stonith_failures, target); + if ((value != NULL) && (value->count >= stonith_max_attempts)) { + goto too_many; + } + } + return FALSE; + +too_many: + crm_warn("Too many failures (%d) to fence %s, giving up", + value->count, target); + return TRUE; +} + +/*! + * \internal + * \brief Reset a stonith fail count + * + * \param[in] target Name of node to reset, or NULL for all + */ +void +st_fail_count_reset(const char *target) +{ + if (stonith_failures == NULL) { + return; + } + + if (target) { + struct st_fail_rec *rec = NULL; + + rec = g_hash_table_lookup(stonith_failures, target); + if (rec) { + rec->count = 0; + } + } else { + GHashTableIter iter; + const char *key = NULL; + struct st_fail_rec *rec = NULL; + + g_hash_table_iter_init(&iter, stonith_failures); + while (g_hash_table_iter_next(&iter, (gpointer *) &key, + (gpointer *) &rec)) { + rec->count = 0; + } + } +} + +static void +st_fail_count_increment(const char *target) +{ + struct st_fail_rec *rec = NULL; + + if (stonith_failures == NULL) { + stonith_failures = pcmk__strkey_table(free, free); + } + + rec = g_hash_table_lookup(stonith_failures, target); + if (rec) { + rec->count++; + } else { + rec = malloc(sizeof(struct st_fail_rec)); + if(rec == NULL) { + return; + } + + rec->count = 1; + g_hash_table_insert(stonith_failures, strdup(target), rec); + } +} + +/* end stonith fail count functions */ + + +static void +cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, + void *user_data) +{ + if (rc < pcmk_ok) { + crm_err("Fencing update %d for %s: failed - %s (%d)", + call_id, (char *)user_data, pcmk_strerror(rc), rc); + crm_log_xml_warn(msg, "Failed update"); + abort_transition(INFINITY, pcmk__graph_shutdown, "CIB update failed", + NULL); + + } else { + crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data); + } +} + +static void +send_stonith_update(pcmk__graph_action_t *action, const char *target, + const char *uuid) +{ + int rc = pcmk_ok; + crm_node_t *peer = NULL; + + /* We (usually) rely on the membership layer to do node_update_cluster, + * and the peer status callback to do node_update_peer, because the node + * might have already rejoined before we get the stonith result here. + */ + int flags = node_update_join | node_update_expected; + + /* zero out the node-status & remove all LRM status info */ + xmlNode *node_state = NULL; + + CRM_CHECK(target != NULL, return); + CRM_CHECK(uuid != NULL, return); + + /* Make sure the membership and join caches are accurate */ + peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY); + + CRM_CHECK(peer != NULL, return); + + if (peer->state == NULL) { + /* Usually, we rely on the membership layer to update the cluster state + * in the CIB. However, if the node has never been seen, do it here, so + * the node is not considered unclean. + */ + flags |= node_update_cluster; + } + + if (peer->uuid == NULL) { + crm_info("Recording uuid '%s' for node '%s'", uuid, target); + peer->uuid = strdup(uuid); + } + + crmd_peer_down(peer, TRUE); + + /* Generate a node state update for the CIB */ + node_state = create_node_state_update(peer, flags, NULL, __func__); + + /* we have to mark whether or not remote nodes have already been fenced */ + if (peer->flags & crm_remote_node) { + char *now_s = pcmk__ttoa(time(NULL)); + + crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s); + free(now_s); + } + + /* Force our known ID */ + crm_xml_add(node_state, XML_ATTR_ID, uuid); + + rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn, + XML_CIB_TAG_STATUS, node_state, + cib_scope_local + |cib_can_create); + + /* Delay processing the trigger until the update completes */ + crm_debug("Sending fencing update %d for %s", rc, target); + fsa_register_cib_callback(rc, strdup(target), cib_fencing_updated); + + // Make sure it sticks + /* controld_globals.cib_conn->cmds->bump_epoch(controld_globals.cib_conn, + * cib_scope_local); + */ + + controld_delete_node_state(peer->uname, controld_section_all, + cib_scope_local); + free_xml(node_state); + return; +} + +/*! + * \internal + * \brief Abort transition due to stonith failure + * + * \param[in] abort_action Whether to restart or stop transition + * \param[in] target Don't restart if this (NULL for any) has too many failures + * \param[in] reason Log this stonith action XML as abort reason (or NULL) + */ +static void +abort_for_stonith_failure(enum pcmk__graph_next abort_action, + const char *target, const xmlNode *reason) +{ + /* If stonith repeatedly fails, we eventually give up on starting a new + * transition for that reason. + */ + if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) { + abort_action = pcmk__graph_wait; + } + abort_transition(INFINITY, abort_action, "Stonith failed", reason); +} + + +/* + * stonith cleanup list + * + * If the DC is shot, proper notifications might not go out. + * The stonith cleanup list allows the cluster to (re-)send + * notifications once a new DC is elected. + */ + +static GList *stonith_cleanup_list = NULL; + +/*! + * \internal + * \brief Add a node to the stonith cleanup list + * + * \param[in] target Name of node to add + */ +void +add_stonith_cleanup(const char *target) { + stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target)); +} + +/*! + * \internal + * \brief Remove a node from the stonith cleanup list + * + * \param[in] Name of node to remove + */ +void +remove_stonith_cleanup(const char *target) +{ + GList *iter = stonith_cleanup_list; + + while (iter != NULL) { + GList *tmp = iter; + char *iter_name = tmp->data; + + iter = iter->next; + if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) { + crm_trace("Removing %s from the cleanup list", iter_name); + stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp); + free(iter_name); + } + } +} + +/*! + * \internal + * \brief Purge all entries from the stonith cleanup list + */ +void +purge_stonith_cleanup(void) +{ + if (stonith_cleanup_list) { + GList *iter = NULL; + + for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { + char *target = iter->data; + + crm_info("Purging %s from stonith cleanup list", target); + free(target); + } + g_list_free(stonith_cleanup_list); + stonith_cleanup_list = NULL; + } +} + +/*! + * \internal + * \brief Send stonith updates for all entries in cleanup list, then purge it + */ +void +execute_stonith_cleanup(void) +{ + GList *iter; + + for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { + char *target = iter->data; + crm_node_t *target_node = crm_get_peer(0, target); + const char *uuid = crm_peer_uuid(target_node); + + crm_notice("Marking %s, target of a previous stonith action, as clean", target); + send_stonith_update(NULL, target, uuid); + free(target); + } + g_list_free(stonith_cleanup_list); + stonith_cleanup_list = NULL; +} + +/* end stonith cleanup list functions */ + + +/* stonith API client + * + * Functions that need to interact directly with the fencer via its API + */ + +static stonith_t *stonith_api = NULL; +static crm_trigger_t *stonith_reconnect = NULL; +static char *te_client_id = NULL; + +static gboolean +fail_incompletable_stonith(pcmk__graph_t *graph) +{ + GList *lpc = NULL; + const char *task = NULL; + xmlNode *last_action = NULL; + + if (graph == NULL) { + return FALSE; + } + + for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { + GList *lpc2 = NULL; + pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data; + + if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) { + continue; + } + + for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) { + pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data; + + if ((action->type != pcmk__cluster_graph_action) + || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { + continue; + } + + task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); + if (task && pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) { + pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); + last_action = action->xml; + pcmk__update_graph(graph, action); + crm_notice("Failing action %d (%s): fencer terminated", + action->id, ID(action->xml)); + } + } + } + + if (last_action != NULL) { + crm_warn("Fencer failure resulted in unrunnable actions"); + abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action); + return TRUE; + } + + return FALSE; +} + +static void +tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e) +{ + te_cleanup_stonith_history_sync(st, FALSE); + + if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) { + crm_crit("Fencing daemon connection failed"); + mainloop_set_trigger(stonith_reconnect); + + } else { + crm_info("Fencing daemon disconnected"); + } + + if (stonith_api) { + /* the client API won't properly reconnect notifications + * if they are still in the table - so remove them + */ + if (stonith_api->state != stonith_disconnected) { + stonith_api->cmds->disconnect(st); + } + stonith_api->cmds->remove_notification(stonith_api, NULL); + } + + if (AM_I_DC) { + fail_incompletable_stonith(controld_globals.transition_graph); + trigger_graph(); + } +} + +/*! + * \internal + * \brief Handle an event notification from the fencing API + * + * \param[in] st Fencing API connection (ignored) + * \param[in] event Fencing API event notification + */ +static void +handle_fence_notification(stonith_t *st, stonith_event_t *event) +{ + bool succeeded = true; + const char *executioner = "the cluster"; + const char *client = "a client"; + const char *reason = NULL; + int exec_status; + + if (te_client_id == NULL) { + te_client_id = crm_strdup_printf("%s.%lu", crm_system_name, + (unsigned long) getpid()); + } + + if (event == NULL) { + crm_err("Notify data not found"); + return; + } + + if (event->executioner != NULL) { + executioner = event->executioner; + } + if (event->client_origin != NULL) { + client = event->client_origin; + } + + exec_status = stonith__event_execution_status(event); + if ((stonith__event_exit_status(event) != CRM_EX_OK) + || (exec_status != PCMK_EXEC_DONE)) { + succeeded = false; + if (exec_status == PCMK_EXEC_DONE) { + exec_status = PCMK_EXEC_ERROR; + } + } + reason = stonith__event_exit_reason(event); + + crmd_alert_fencing_op(event); + + if (pcmk__str_eq("on", event->action, pcmk__str_none)) { + // Unfencing doesn't need special handling, just a log message + if (succeeded) { + crm_notice("%s was unfenced by %s at the request of %s@%s", + event->target, executioner, client, event->origin); + } else { + crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d", + event->target, executioner, + pcmk_exec_status_str(exec_status), + ((reason == NULL)? "" : ": "), + ((reason == NULL)? "" : reason), + stonith__event_exit_status(event)); + } + return; + } + + if (succeeded + && pcmk__str_eq(event->target, controld_globals.our_nodename, + pcmk__str_casei)) { + /* We were notified of our own fencing. Most likely, either fencing was + * misconfigured, or fabric fencing that doesn't cut cluster + * communication is in use. + * + * Either way, shutting down the local host is a good idea, to require + * administrator intervention. Also, other nodes would otherwise likely + * set our status to lost because of the fencing callback and discard + * our subsequent election votes as "not part of our cluster". + */ + crm_crit("We were allegedly just fenced by %s for %s!", + executioner, event->origin); // Dumps blackbox if enabled + if (fence_reaction_panic) { + pcmk__panic(__func__); + } else { + crm_exit(CRM_EX_FATAL); + } + return; // Should never get here + } + + /* Update the count of fencing failures for this target, in case we become + * DC later. The current DC has already updated its fail count in + * tengine_stonith_callback(). + */ + if (!AM_I_DC) { + if (succeeded) { + st_fail_count_reset(event->target); + } else { + st_fail_count_increment(event->target); + } + } + + crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: " + "%s%s%s%s " CRM_XS " event=%s", + event->target, (succeeded? "" : " not"), + event->action, executioner, client, event->origin, + (succeeded? "OK" : pcmk_exec_status_str(exec_status)), + ((reason == NULL)? "" : " ("), + ((reason == NULL)? "" : reason), + ((reason == NULL)? "" : ")"), + event->id); + + if (succeeded) { + crm_node_t *peer = pcmk__search_known_node_cache(0, event->target, + CRM_GET_PEER_ANY); + const char *uuid = NULL; + + if (peer == NULL) { + return; + } + + uuid = crm_peer_uuid(peer); + + if (AM_I_DC) { + /* The DC always sends updates */ + send_stonith_update(NULL, event->target, uuid); + + /* @TODO Ideally, at this point, we'd check whether the fenced node + * hosted any guest nodes, and call remote_node_down() for them. + * Unfortunately, the controller doesn't have a simple, reliable way + * to map hosts to guests. It might be possible to track this in the + * peer cache via crm_remote_peer_cache_refresh(). For now, we rely + * on the scheduler creating fence pseudo-events for the guests. + */ + + if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) { + /* Abort the current transition if it wasn't the cluster that + * initiated fencing. + */ + crm_info("External fencing operation from %s fenced %s", + client, event->target); + abort_transition(INFINITY, pcmk__graph_restart, + "External Fencing Operation", NULL); + } + + } else if (pcmk__str_eq(controld_globals.dc_name, event->target, + pcmk__str_null_matches|pcmk__str_casei) + && !pcmk_is_set(peer->flags, crm_remote_node)) { + // Assume the target was our DC if we don't currently have one + + if (controld_globals.dc_name != NULL) { + crm_notice("Fencing target %s was our DC", event->target); + } else { + crm_notice("Fencing target %s may have been our DC", + event->target); + } + + /* Given the CIB resyncing that occurs around elections, + * have one node update the CIB now and, if the new DC is different, + * have them do so too after the election + */ + if (pcmk__str_eq(event->executioner, controld_globals.our_nodename, + pcmk__str_casei)) { + send_stonith_update(NULL, event->target, uuid); + } + add_stonith_cleanup(event->target); + } + + /* If the target is a remote node, and we host its connection, + * immediately fail all monitors so it can be recovered quickly. + * The connection won't necessarily drop when a remote node is fenced, + * so the failure might not otherwise be detected until the next poke. + */ + if (pcmk_is_set(peer->flags, crm_remote_node)) { + remote_ra_fail(event->target); + } + + crmd_peer_down(peer, TRUE); + } +} + +/*! + * \brief Connect to fencer + * + * \param[in] user_data If NULL, retry failures now, otherwise retry in main loop + * + * \return TRUE + * \note If user_data is NULL, this will wait 2s between attempts, for up to + * 30 attempts, meaning the controller could be blocked as long as 58s. + */ +static gboolean +te_connect_stonith(gpointer user_data) +{ + int rc = pcmk_ok; + + if (stonith_api == NULL) { + stonith_api = stonith_api_new(); + if (stonith_api == NULL) { + crm_err("Could not connect to fencer: API memory allocation failed"); + return TRUE; + } + } + + if (stonith_api->state != stonith_disconnected) { + crm_trace("Already connected to fencer, no need to retry"); + return TRUE; + } + + if (user_data == NULL) { + // Blocking (retry failures now until successful) + rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30); + if (rc != pcmk_ok) { + crm_err("Could not connect to fencer in 30 attempts: %s " + CRM_XS " rc=%d", pcmk_strerror(rc), rc); + } + } else { + // Non-blocking (retry failures later in main loop) + rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL); + if (rc != pcmk_ok) { + if (pcmk_is_set(controld_globals.fsa_input_register, + R_ST_REQUIRED)) { + crm_notice("Fencer connection failed (will retry): %s " + CRM_XS " rc=%d", pcmk_strerror(rc), rc); + mainloop_set_trigger(stonith_reconnect); + } else { + crm_info("Fencer connection failed (ignoring because no longer required): %s " + CRM_XS " rc=%d", pcmk_strerror(rc), rc); + } + return TRUE; + } + } + + if (rc == pcmk_ok) { + stonith_api->cmds->register_notification(stonith_api, + T_STONITH_NOTIFY_DISCONNECT, + tengine_stonith_connection_destroy); + stonith_api->cmds->register_notification(stonith_api, + T_STONITH_NOTIFY_FENCE, + handle_fence_notification); + stonith_api->cmds->register_notification(stonith_api, + T_STONITH_NOTIFY_HISTORY_SYNCED, + tengine_stonith_history_synced); + te_trigger_stonith_history_sync(TRUE); + crm_notice("Fencer successfully connected"); + } + + return TRUE; +} + +/*! + \internal + \brief Schedule fencer connection attempt in main loop +*/ +void +controld_trigger_fencer_connect(void) +{ + if (stonith_reconnect == NULL) { + stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, + te_connect_stonith, + GINT_TO_POINTER(TRUE)); + } + controld_set_fsa_input_flags(R_ST_REQUIRED); + mainloop_set_trigger(stonith_reconnect); +} + +void +controld_disconnect_fencer(bool destroy) +{ + if (stonith_api) { + // Prevent fencer connection from coming up again + controld_clear_fsa_input_flags(R_ST_REQUIRED); + + if (stonith_api->state != stonith_disconnected) { + stonith_api->cmds->disconnect(stonith_api); + } + stonith_api->cmds->remove_notification(stonith_api, NULL); + } + if (destroy) { + if (stonith_api) { + stonith_api->cmds->free(stonith_api); + stonith_api = NULL; + } + if (stonith_reconnect) { + mainloop_destroy_trigger(stonith_reconnect); + stonith_reconnect = NULL; + } + if (te_client_id) { + free(te_client_id); + te_client_id = NULL; + } + } +} + +static gboolean +do_stonith_history_sync(gpointer user_data) +{ + if (stonith_api && (stonith_api->state != stonith_disconnected)) { + stonith_history_t *history = NULL; + + te_cleanup_stonith_history_sync(stonith_api, FALSE); + stonith_api->cmds->history(stonith_api, + st_opt_sync_call | st_opt_broadcast, + NULL, &history, 5); + stonith_history_free(history); + return TRUE; + } else { + crm_info("Skip triggering stonith history-sync as stonith is disconnected"); + return FALSE; + } +} + +static void +tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data) +{ + char *uuid = NULL; + int stonith_id = -1; + int transition_id = -1; + pcmk__graph_action_t *action = NULL; + const char *target = NULL; + + if ((data == NULL) || (data->userdata == NULL)) { + crm_err("Ignoring fence operation %d result: " + "No transition key given (bug?)", + ((data == NULL)? -1 : data->call_id)); + return; + } + + if (!AM_I_DC) { + const char *reason = stonith__exit_reason(data); + + if (reason == NULL) { + reason = pcmk_exec_status_str(stonith__execution_status(data)); + } + crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s", + data->call_id, stonith__exit_status(data), reason, + (const char *) data->userdata); + return; + } + + CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id, + &stonith_id, NULL), + goto bail); + + if (controld_globals.transition_graph->complete || (stonith_id < 0) + || !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none) + || (controld_globals.transition_graph->id != transition_id)) { + crm_info("Ignoring fence operation %d result: " + "Not from current transition " CRM_XS + " complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)", + data->call_id, + pcmk__btoa(controld_globals.transition_graph->complete), + stonith_id, uuid, controld_globals.te_uuid, transition_id, + controld_globals.transition_graph->id); + goto bail; + } + + action = controld_get_action(stonith_id); + if (action == NULL) { + crm_err("Ignoring fence operation %d result: " + "Action %d not found in transition graph (bug?) " + CRM_XS " uuid=%s transition=%d", + data->call_id, stonith_id, uuid, transition_id); + goto bail; + } + + target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + if (target == NULL) { + crm_err("Ignoring fence operation %d result: No target given (bug?)", + data->call_id); + goto bail; + } + + stop_te_timer(action); + if (stonith__exit_status(data) == CRM_EX_OK) { + const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); + const char *op = crm_meta_value(action->params, "stonith_action"); + + crm_info("Fence operation %d for %s succeeded", data->call_id, target); + if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) { + te_action_confirmed(action, NULL); + if (pcmk__str_eq("on", op, pcmk__str_casei)) { + const char *value = NULL; + char *now = pcmk__ttoa(time(NULL)); + gboolean is_remote_node = FALSE; + + /* This check is not 100% reliable, since this node is not + * guaranteed to have the remote node cached. However, it + * doesn't have to be reliable, since the attribute manager can + * learn a node's "remoteness" by other means sooner or later. + * This allows it to learn more quickly if this node does have + * the information. + */ + if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) { + is_remote_node = TRUE; + } + + update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, + is_remote_node); + free(now); + + value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL); + update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, + is_remote_node); + + value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE); + update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, + is_remote_node); + + } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) { + send_stonith_update(action, target, uuid); + pcmk__set_graph_action_flags(action, + pcmk__graph_action_sent_update); + } + } + st_fail_count_reset(target); + + } else { + enum pcmk__graph_next abort_action = pcmk__graph_restart; + int status = stonith__execution_status(data); + const char *reason = stonith__exit_reason(data); + + if (reason == NULL) { + if (status == PCMK_EXEC_DONE) { + reason = "Agent returned error"; + } else { + reason = pcmk_exec_status_str(status); + } + } + pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); + + /* If no fence devices were available, there's no use in immediately + * checking again, so don't start a new transition in that case. + */ + if (status == PCMK_EXEC_NO_FENCE_DEVICE) { + crm_warn("Fence operation %d for %s failed: %s " + "(aborting transition and giving up for now)", + data->call_id, target, reason); + abort_action = pcmk__graph_wait; + } else { + crm_notice("Fence operation %d for %s failed: %s " + "(aborting transition)", data->call_id, target, reason); + } + + /* Increment the fail count now, so abort_for_stonith_failure() can + * check it. Non-DC nodes will increment it in + * handle_fence_notification(). + */ + st_fail_count_increment(target); + abort_for_stonith_failure(abort_action, target, NULL); + } + + pcmk__update_graph(controld_globals.transition_graph, action); + trigger_graph(); + + bail: + free(data->userdata); + free(uuid); + return; +} + +static int +fence_with_delay(const char *target, const char *type, int delay) +{ + uint32_t options = st_opt_none; // Group of enum stonith_call_options + int timeout_sec = (int) (controld_globals.transition_graph->stonith_timeout + / 1000); + + if (crmd_join_phase_count(crm_join_confirmed) == 1) { + stonith__set_call_options(options, target, st_opt_allow_suicide); + } + return stonith_api->cmds->fence_with_delay(stonith_api, options, target, + type, timeout_sec, 0, delay); +} + +/*! + * \internal + * \brief Execute a fencing action from a transition graph + * + * \param[in] graph Transition graph being executed (ignored) + * \param[in] action Fencing action to execute + * + * \return Standard Pacemaker return code + */ +int +controld_execute_fence_action(pcmk__graph_t *graph, + pcmk__graph_action_t *action) +{ + int rc = 0; + const char *id = ID(action->xml); + const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); + const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + const char *type = crm_meta_value(action->params, "stonith_action"); + char *transition_key = NULL; + const char *priority_delay = NULL; + int delay_i = 0; + gboolean invalid_action = FALSE; + int stonith_timeout = (int) (controld_globals.transition_graph->stonith_timeout + / 1000); + + CRM_CHECK(id != NULL, invalid_action = TRUE); + CRM_CHECK(uuid != NULL, invalid_action = TRUE); + CRM_CHECK(type != NULL, invalid_action = TRUE); + CRM_CHECK(target != NULL, invalid_action = TRUE); + + if (invalid_action) { + crm_log_xml_warn(action->xml, "BadAction"); + return EPROTO; + } + + priority_delay = crm_meta_value(action->params, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY); + + crm_notice("Requesting fencing (%s) targeting node %s " + CRM_XS " action=%s timeout=%i%s%s", + type, target, id, stonith_timeout, + priority_delay ? " priority_delay=" : "", + priority_delay ? priority_delay : ""); + + /* Passing NULL means block until we can connect... */ + te_connect_stonith(NULL); + + pcmk__scan_min_int(priority_delay, &delay_i, 0); + rc = fence_with_delay(target, type, delay_i); + transition_key = pcmk__transition_key(controld_globals.transition_graph->id, + action->id, 0, + controld_globals.te_uuid), + stonith_api->cmds->register_callback(stonith_api, rc, + (stonith_timeout + + (delay_i > 0 ? delay_i : 0)), + st_opt_timeout_updates, transition_key, + "tengine_stonith_callback", + tengine_stonith_callback); + return pcmk_rc_ok; +} + +bool +controld_verify_stonith_watchdog_timeout(const char *value) +{ + const char *our_nodename = controld_globals.our_nodename; + gboolean rv = TRUE; + + if (stonith_api && (stonith_api->state != stonith_disconnected) && + stonith__watchdog_fencing_enabled_for_node_api(stonith_api, + our_nodename)) { + rv = pcmk__valid_sbd_timeout(value); + } + return rv; +} + +/* end stonith API client functions */ + + +/* + * stonith history synchronization + * + * Each node's fencer keeps track of a cluster-wide fencing history. When a node + * joins or leaves, we need to synchronize the history across all nodes. + */ + +static crm_trigger_t *stonith_history_sync_trigger = NULL; +static mainloop_timer_t *stonith_history_sync_timer_short = NULL; +static mainloop_timer_t *stonith_history_sync_timer_long = NULL; + +void +te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers) +{ + if (free_timers) { + mainloop_timer_del(stonith_history_sync_timer_short); + stonith_history_sync_timer_short = NULL; + mainloop_timer_del(stonith_history_sync_timer_long); + stonith_history_sync_timer_long = NULL; + } else { + mainloop_timer_stop(stonith_history_sync_timer_short); + mainloop_timer_stop(stonith_history_sync_timer_long); + } + + if (st) { + st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED); + } +} + +static void +tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event) +{ + te_cleanup_stonith_history_sync(st, FALSE); + crm_debug("Fence-history synced - cancel all timers"); +} + +static gboolean +stonith_history_sync_set_trigger(gpointer user_data) +{ + mainloop_set_trigger(stonith_history_sync_trigger); + return FALSE; +} + +void +te_trigger_stonith_history_sync(bool long_timeout) +{ + /* trigger a sync in 5s to give more nodes the + * chance to show up so that we don't create + * unnecessary stonith-history-sync traffic + * + * the long timeout of 30s is there as a fallback + * so that after a successful connection to fenced + * we will wait for 30s for the DC to trigger a + * history-sync + * if this doesn't happen we trigger a sync locally + * (e.g. fenced segfaults and is restarted by pacemakerd) + */ + + /* as we are finally checking the stonith-connection + * in do_stonith_history_sync we should be fine + * leaving stonith_history_sync_time & stonith_history_sync_trigger + * around + */ + if (stonith_history_sync_trigger == NULL) { + stonith_history_sync_trigger = + mainloop_add_trigger(G_PRIORITY_LOW, + do_stonith_history_sync, NULL); + } + + if (long_timeout) { + if(stonith_history_sync_timer_long == NULL) { + stonith_history_sync_timer_long = + mainloop_timer_add("history_sync_long", 30000, + FALSE, stonith_history_sync_set_trigger, + NULL); + } + crm_info("Fence history will be synchronized cluster-wide within 30 seconds"); + mainloop_timer_start(stonith_history_sync_timer_long); + } else { + if(stonith_history_sync_timer_short == NULL) { + stonith_history_sync_timer_short = + mainloop_timer_add("history_sync_short", 5000, + FALSE, stonith_history_sync_set_trigger, + NULL); + } + crm_info("Fence history will be synchronized cluster-wide within 5 seconds"); + mainloop_timer_start(stonith_history_sync_timer_short); + } + +} + +/* end stonith history synchronization functions */ diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h new file mode 100644 index 0000000..86a5050 --- /dev/null +++ b/daemons/controld/controld_fencing.h @@ -0,0 +1,38 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef CONTROLD_FENCING__H +# define CONTROLD_FENCING__H + +#include // bool +#include // pcmk__graph_t, pcmk__graph_action_t + +void controld_configure_fencing(GHashTable *options); + +// stonith fail counts +void st_fail_count_reset(const char * target); + +// stonith API client +void controld_trigger_fencer_connect(void); +void controld_disconnect_fencer(bool destroy); +int controld_execute_fence_action(pcmk__graph_t *graph, + pcmk__graph_action_t *action); +bool controld_verify_stonith_watchdog_timeout(const char *value); + +// stonith cleanup list +void add_stonith_cleanup(const char *target); +void remove_stonith_cleanup(const char *target); +void purge_stonith_cleanup(void); +void execute_stonith_cleanup(void); + +// stonith history synchronization +void te_trigger_stonith_history_sync(bool long_timeout); +void te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers); + +#endif diff --git a/daemons/controld/controld_fsa.c b/daemons/controld/controld_fsa.c new file mode 100644 index 0000000..622d1c8 --- /dev/null +++ b/daemons/controld/controld_fsa.c @@ -0,0 +1,741 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include // uint64_t +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +//! Triggers an FSA invocation +static crm_trigger_t *fsa_trigger = NULL; + +#define DOT_PREFIX "actions:trace: " +#define do_dot_log(fmt, args...) crm_trace( fmt, ##args) + +static void do_state_transition(enum crmd_fsa_state cur_state, + enum crmd_fsa_state next_state, + fsa_data_t *msg_data); + +void s_crmd_fsa_actions(fsa_data_t * fsa_data); +void log_fsa_input(fsa_data_t * stored_msg); +void init_dotfile(void); + +void +init_dotfile(void) +{ + do_dot_log(DOT_PREFIX "digraph \"g\" {"); + do_dot_log(DOT_PREFIX " size = \"30,30\""); + do_dot_log(DOT_PREFIX " graph ["); + do_dot_log(DOT_PREFIX " fontsize = \"12\""); + do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); + do_dot_log(DOT_PREFIX " fontcolor = \"black\""); + do_dot_log(DOT_PREFIX " bb = \"0,0,398.922306,478.927856\""); + do_dot_log(DOT_PREFIX " color = \"black\""); + do_dot_log(DOT_PREFIX " ]"); + do_dot_log(DOT_PREFIX " node ["); + do_dot_log(DOT_PREFIX " fontsize = \"12\""); + do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); + do_dot_log(DOT_PREFIX " fontcolor = \"black\""); + do_dot_log(DOT_PREFIX " shape = \"ellipse\""); + do_dot_log(DOT_PREFIX " color = \"black\""); + do_dot_log(DOT_PREFIX " ]"); + do_dot_log(DOT_PREFIX " edge ["); + do_dot_log(DOT_PREFIX " fontsize = \"12\""); + do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); + do_dot_log(DOT_PREFIX " fontcolor = \"black\""); + do_dot_log(DOT_PREFIX " color = \"black\""); + do_dot_log(DOT_PREFIX " ]"); + do_dot_log(DOT_PREFIX "// special nodes"); + do_dot_log(DOT_PREFIX " \"S_PENDING\" "); + do_dot_log(DOT_PREFIX " ["); + do_dot_log(DOT_PREFIX " color = \"blue\""); + do_dot_log(DOT_PREFIX " fontcolor = \"blue\""); + do_dot_log(DOT_PREFIX " ]"); + do_dot_log(DOT_PREFIX " \"S_TERMINATE\" "); + do_dot_log(DOT_PREFIX " ["); + do_dot_log(DOT_PREFIX " color = \"red\""); + do_dot_log(DOT_PREFIX " fontcolor = \"red\""); + do_dot_log(DOT_PREFIX " ]"); + do_dot_log(DOT_PREFIX "// DC only nodes"); + do_dot_log(DOT_PREFIX " \"S_INTEGRATION\" [ fontcolor = \"green\" ]"); + do_dot_log(DOT_PREFIX " \"S_POLICY_ENGINE\" [ fontcolor = \"green\" ]"); + do_dot_log(DOT_PREFIX " \"S_TRANSITION_ENGINE\" [ fontcolor = \"green\" ]"); + do_dot_log(DOT_PREFIX " \"S_RELEASE_DC\" [ fontcolor = \"green\" ]"); + do_dot_log(DOT_PREFIX " \"S_IDLE\" [ fontcolor = \"green\" ]"); +} + +static void +do_fsa_action(fsa_data_t * fsa_data, long long an_action, + void (*function) (long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t * msg_data)) +{ + controld_clear_fsa_action_flags(an_action); + crm_trace(DOT_PREFIX "\t// %s", fsa_action2string(an_action)); + function(an_action, fsa_data->fsa_cause, controld_globals.fsa_state, + fsa_data->fsa_input, fsa_data); +} + +static const uint64_t startup_actions = + A_STARTUP | A_CIB_START | A_LRM_CONNECT | A_HA_CONNECT | A_READCONFIG | + A_STARTED | A_CL_JOIN_QUERY; + +// A_LOG, A_WARN, A_ERROR +void +do_log(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t *msg_data) +{ + unsigned log_type = LOG_TRACE; + + if (action & A_LOG) { + log_type = LOG_INFO; + } else if (action & A_WARN) { + log_type = LOG_WARNING; + } else if (action & A_ERROR) { + log_type = LOG_ERR; + } + + do_crm_log(log_type, "Input %s received in state %s from %s", + fsa_input2string(msg_data->fsa_input), + fsa_state2string(cur_state), msg_data->origin); + + if (msg_data->data_type == fsa_dt_ha_msg) { + ha_msg_input_t *input = fsa_typed_data(msg_data->data_type); + + crm_log_xml_debug(input->msg, __func__); + + } else if (msg_data->data_type == fsa_dt_xml) { + xmlNode *input = fsa_typed_data(msg_data->data_type); + + crm_log_xml_debug(input, __func__); + + } else if (msg_data->data_type == fsa_dt_lrm) { + lrmd_event_data_t *input = fsa_typed_data(msg_data->data_type); + + do_crm_log(log_type, + "Resource %s: Call ID %d returned %d (%d)." + " New status if rc=0: %s", + input->rsc_id, input->call_id, input->rc, + input->op_status, (char *)input->user_data); + } +} + +/*! + * \internal + * \brief Initialize the FSA trigger + */ +void +controld_init_fsa_trigger(void) +{ + fsa_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL); +} + +/*! + * \internal + * \brief Destroy the FSA trigger + */ +void +controld_destroy_fsa_trigger(void) +{ + // This basically will not work, since mainloop has a reference to it + mainloop_destroy_trigger(fsa_trigger); + fsa_trigger = NULL; +} + +/*! + * \internal + * \brief Trigger an FSA invocation + * + * \param[in] fn Calling function name + * \param[in] line Line number where call occurred + */ +void +controld_trigger_fsa_as(const char *fn, int line) +{ + if (fsa_trigger != NULL) { + crm_trace("%s:%d - Triggered FSA invocation", fn, line); + mainloop_set_trigger(fsa_trigger); + } +} + +enum crmd_fsa_state +s_crmd_fsa(enum crmd_fsa_cause cause) +{ + controld_globals_t *globals = &controld_globals; + fsa_data_t *fsa_data = NULL; + uint64_t register_copy = controld_globals.fsa_input_register; + uint64_t new_actions = A_NOTHING; + enum crmd_fsa_state last_state; + + crm_trace("FSA invoked with Cause: %s\tState: %s", + fsa_cause2string(cause), + fsa_state2string(globals->fsa_state)); + + fsa_dump_actions(controld_globals.fsa_actions, "Initial"); + + controld_clear_global_flags(controld_fsa_is_stalled); + if ((controld_globals.fsa_message_queue == NULL) + && (controld_globals.fsa_actions != A_NOTHING)) { + /* fake the first message so we can get into the loop */ + fsa_data = calloc(1, sizeof(fsa_data_t)); + fsa_data->fsa_input = I_NULL; + fsa_data->fsa_cause = C_FSA_INTERNAL; + fsa_data->origin = __func__; + fsa_data->data_type = fsa_dt_none; + controld_globals.fsa_message_queue + = g_list_append(controld_globals.fsa_message_queue, fsa_data); + fsa_data = NULL; + } + while ((controld_globals.fsa_message_queue != NULL) + && !pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) { + crm_trace("Checking messages (%d remaining)", + g_list_length(controld_globals.fsa_message_queue)); + + fsa_data = get_message(); + if(fsa_data == NULL) { + continue; + } + + log_fsa_input(fsa_data); + + /* add any actions back to the queue */ + controld_set_fsa_action_flags(fsa_data->actions); + fsa_dump_actions(fsa_data->actions, "Restored actions"); + + /* get the next batch of actions */ + new_actions = controld_fsa_get_action(fsa_data->fsa_input); + controld_set_fsa_action_flags(new_actions); + fsa_dump_actions(new_actions, "New actions"); + + if (fsa_data->fsa_input != I_NULL && fsa_data->fsa_input != I_ROUTER) { + crm_debug("Processing %s: [ state=%s cause=%s origin=%s ]", + fsa_input2string(fsa_data->fsa_input), + fsa_state2string(globals->fsa_state), + fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); + } + + /* logging : *before* the state is changed */ + if (pcmk_is_set(controld_globals.fsa_actions, A_ERROR)) { + do_fsa_action(fsa_data, A_ERROR, do_log); + } + if (pcmk_is_set(controld_globals.fsa_actions, A_WARN)) { + do_fsa_action(fsa_data, A_WARN, do_log); + } + if (pcmk_is_set(controld_globals.fsa_actions, A_LOG)) { + do_fsa_action(fsa_data, A_LOG, do_log); + } + + /* update state variables */ + last_state = globals->fsa_state; + globals->fsa_state = controld_fsa_get_next_state(fsa_data->fsa_input); + + /* + * Remove certain actions during shutdown + */ + if ((globals->fsa_state == S_STOPPING) + || pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + controld_clear_fsa_action_flags(startup_actions); + } + + /* + * Hook for change of state. + * Allows actions to be added or removed when entering a state + */ + if (last_state != globals->fsa_state) { + do_state_transition(last_state, globals->fsa_state, fsa_data); + } else { + do_dot_log(DOT_PREFIX "\t// FSA input: State=%s \tCause=%s" + " \tInput=%s \tOrigin=%s() \tid=%d", + fsa_state2string(globals->fsa_state), + fsa_cause2string(fsa_data->fsa_cause), + fsa_input2string(fsa_data->fsa_input), fsa_data->origin, fsa_data->id); + } + + /* start doing things... */ + s_crmd_fsa_actions(fsa_data); + delete_fsa_input(fsa_data); + fsa_data = NULL; + } + + if ((controld_globals.fsa_message_queue != NULL) + || (controld_globals.fsa_actions != A_NOTHING) + || pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) { + + crm_debug("Exiting the FSA: queue=%d, fsa_actions=%#llx, stalled=%s", + g_list_length(controld_globals.fsa_message_queue), + (unsigned long long) controld_globals.fsa_actions, + pcmk__btoa(pcmk_is_set(controld_globals.flags, + controld_fsa_is_stalled))); + } else { + crm_trace("Exiting the FSA"); + } + + /* cleanup inputs? */ + if (register_copy != controld_globals.fsa_input_register) { + uint64_t same = register_copy & controld_globals.fsa_input_register; + + fsa_dump_inputs(LOG_DEBUG, "Added", + controld_globals.fsa_input_register ^ same); + fsa_dump_inputs(LOG_DEBUG, "Removed", register_copy ^ same); + } + + fsa_dump_actions(controld_globals.fsa_actions, "Remaining"); + fsa_dump_queue(LOG_DEBUG); + + return globals->fsa_state; +} + +void +s_crmd_fsa_actions(fsa_data_t * fsa_data) +{ + /* + * Process actions in order of priority but do only one + * action at a time to avoid complicating the ordering. + */ + CRM_CHECK(fsa_data != NULL, return); + while ((controld_globals.fsa_actions != A_NOTHING) + && !pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) { + + /* regular action processing in order of action priority + * + * Make sure all actions that connect to required systems + * are performed first + */ + if (pcmk_is_set(controld_globals.fsa_actions, A_ERROR)) { + do_fsa_action(fsa_data, A_ERROR, do_log); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_WARN)) { + do_fsa_action(fsa_data, A_WARN, do_log); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_LOG)) { + do_fsa_action(fsa_data, A_LOG, do_log); + + /* get out of here NOW! before anything worse happens */ + } else if (pcmk_is_set(controld_globals.fsa_actions, A_EXIT_1)) { + do_fsa_action(fsa_data, A_EXIT_1, do_exit); + + /* sub-system restart */ + } else if (pcmk_all_flags_set(controld_globals.fsa_actions, + O_LRM_RECONNECT)) { + do_fsa_action(fsa_data, O_LRM_RECONNECT, do_lrm_control); + + } else if (pcmk_all_flags_set(controld_globals.fsa_actions, + O_CIB_RESTART)) { + do_fsa_action(fsa_data, O_CIB_RESTART, do_cib_control); + + } else if (pcmk_all_flags_set(controld_globals.fsa_actions, + O_PE_RESTART)) { + do_fsa_action(fsa_data, O_PE_RESTART, do_pe_control); + + } else if (pcmk_all_flags_set(controld_globals.fsa_actions, + O_TE_RESTART)) { + do_fsa_action(fsa_data, O_TE_RESTART, do_te_control); + + /* essential start tasks */ + } else if (pcmk_is_set(controld_globals.fsa_actions, A_STARTUP)) { + do_fsa_action(fsa_data, A_STARTUP, do_startup); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_CIB_START)) { + do_fsa_action(fsa_data, A_CIB_START, do_cib_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_HA_CONNECT)) { + do_fsa_action(fsa_data, A_HA_CONNECT, do_ha_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_READCONFIG)) { + do_fsa_action(fsa_data, A_READCONFIG, do_read_config); + + /* sub-system start/connect */ + } else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_CONNECT)) { + do_fsa_action(fsa_data, A_LRM_CONNECT, do_lrm_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_START)) { + do_fsa_action(fsa_data, A_TE_START, do_te_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_START)) { + do_fsa_action(fsa_data, A_PE_START, do_pe_control); + + /* Timers */ + } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_TIMER_STOP)) { + do_fsa_action(fsa_data, A_DC_TIMER_STOP, do_timer_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_INTEGRATE_TIMER_STOP)) { + do_fsa_action(fsa_data, A_INTEGRATE_TIMER_STOP, do_timer_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_INTEGRATE_TIMER_START)) { + do_fsa_action(fsa_data, A_INTEGRATE_TIMER_START, do_timer_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_FINALIZE_TIMER_STOP)) { + do_fsa_action(fsa_data, A_FINALIZE_TIMER_STOP, do_timer_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_FINALIZE_TIMER_START)) { + do_fsa_action(fsa_data, A_FINALIZE_TIMER_START, do_timer_control); + + /* + * Highest priority actions + */ + } else if (pcmk_is_set(controld_globals.fsa_actions, A_MSG_ROUTE)) { + do_fsa_action(fsa_data, A_MSG_ROUTE, do_msg_route); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_RECOVER)) { + do_fsa_action(fsa_data, A_RECOVER, do_recover); + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_CL_JOIN_RESULT)) { + do_fsa_action(fsa_data, A_CL_JOIN_RESULT, + do_cl_join_finalize_respond); + + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_CL_JOIN_REQUEST)) { + do_fsa_action(fsa_data, A_CL_JOIN_REQUEST, + do_cl_join_offer_respond); + + } else if (pcmk_is_set(controld_globals.fsa_actions, A_SHUTDOWN_REQ)) { + do_fsa_action(fsa_data, A_SHUTDOWN_REQ, do_shutdown_req); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_ELECTION_VOTE)) { + do_fsa_action(fsa_data, A_ELECTION_VOTE, do_election_vote); + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_ELECTION_COUNT)) { + do_fsa_action(fsa_data, A_ELECTION_COUNT, do_election_count_vote); + + } else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_EVENT)) { + do_fsa_action(fsa_data, A_LRM_EVENT, do_lrm_event); + + /* + * High priority actions + */ + } else if (pcmk_is_set(controld_globals.fsa_actions, A_STARTED)) { + do_fsa_action(fsa_data, A_STARTED, do_started); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_CL_JOIN_QUERY)) { + do_fsa_action(fsa_data, A_CL_JOIN_QUERY, do_cl_join_query); + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_DC_TIMER_START)) { + do_fsa_action(fsa_data, A_DC_TIMER_START, do_timer_control); + + /* + * Medium priority actions + * - Membership + */ + } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_TAKEOVER)) { + do_fsa_action(fsa_data, A_DC_TAKEOVER, do_dc_takeover); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_RELEASE)) { + do_fsa_action(fsa_data, A_DC_RELEASE, do_dc_release); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_JOIN_FINAL)) { + do_fsa_action(fsa_data, A_DC_JOIN_FINAL, do_dc_join_final); + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_ELECTION_CHECK)) { + do_fsa_action(fsa_data, A_ELECTION_CHECK, do_election_check); + + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_ELECTION_START)) { + do_fsa_action(fsa_data, A_ELECTION_START, do_election_vote); + + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_DC_JOIN_OFFER_ALL)) { + do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ALL, do_dc_join_offer_all); + + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_DC_JOIN_OFFER_ONE)) { + do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ONE, do_dc_join_offer_one); + + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_DC_JOIN_PROCESS_REQ)) { + do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_REQ, + do_dc_join_filter_offer); + + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_DC_JOIN_PROCESS_ACK)) { + do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_ACK, do_dc_join_ack); + + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_DC_JOIN_FINALIZE)) { + do_fsa_action(fsa_data, A_DC_JOIN_FINALIZE, do_dc_join_finalize); + + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_CL_JOIN_ANNOUNCE)) { + do_fsa_action(fsa_data, A_CL_JOIN_ANNOUNCE, do_cl_join_announce); + + /* + * Low(er) priority actions + * Make sure the CIB is always updated before invoking the + * scheduler, and the scheduler before the transition engine. + */ + } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_HALT)) { + do_fsa_action(fsa_data, A_TE_HALT, do_te_invoke); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_CANCEL)) { + do_fsa_action(fsa_data, A_TE_CANCEL, do_te_invoke); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_INVOKE)) { + do_fsa_action(fsa_data, A_LRM_INVOKE, do_lrm_invoke); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_INVOKE)) { + do_fsa_action(fsa_data, A_PE_INVOKE, do_pe_invoke); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_INVOKE)) { + do_fsa_action(fsa_data, A_TE_INVOKE, do_te_invoke); + + /* Shutdown actions */ + } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_RELEASED)) { + do_fsa_action(fsa_data, A_DC_RELEASED, do_dc_release); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_STOP)) { + do_fsa_action(fsa_data, A_PE_STOP, do_pe_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_STOP)) { + do_fsa_action(fsa_data, A_TE_STOP, do_te_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_SHUTDOWN)) { + do_fsa_action(fsa_data, A_SHUTDOWN, do_shutdown); + } else if (pcmk_is_set(controld_globals.fsa_actions, + A_LRM_DISCONNECT)) { + do_fsa_action(fsa_data, A_LRM_DISCONNECT, do_lrm_control); + + } else if (pcmk_is_set(controld_globals.fsa_actions, A_HA_DISCONNECT)) { + do_fsa_action(fsa_data, A_HA_DISCONNECT, do_ha_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_CIB_STOP)) { + do_fsa_action(fsa_data, A_CIB_STOP, do_cib_control); + } else if (pcmk_is_set(controld_globals.fsa_actions, A_STOP)) { + do_fsa_action(fsa_data, A_STOP, do_stop); + + /* exit gracefully */ + } else if (pcmk_is_set(controld_globals.fsa_actions, A_EXIT_0)) { + do_fsa_action(fsa_data, A_EXIT_0, do_exit); + + /* Error checking and reporting */ + } else { + crm_err("Action %s not supported "CRM_XS" %#llx", + fsa_action2string(controld_globals.fsa_actions), + (unsigned long long) controld_globals.fsa_actions); + register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, fsa_data, NULL, + __func__); + } + } +} + +void +log_fsa_input(fsa_data_t * stored_msg) +{ + CRM_ASSERT(stored_msg); + crm_trace("Processing queued input %d", stored_msg->id); + if (stored_msg->fsa_cause == C_LRM_OP_CALLBACK) { + crm_trace("FSA processing LRM callback from %s", stored_msg->origin); + + } else if (stored_msg->data == NULL) { + crm_trace("FSA processing input from %s", stored_msg->origin); + + } else { + ha_msg_input_t *ha_input = fsa_typed_data_adv(stored_msg, fsa_dt_ha_msg, + __func__); + + crm_trace("FSA processing XML message from %s", stored_msg->origin); + crm_log_xml_trace(ha_input->xml, "FSA message data"); + } +} + +static void +check_join_counts(fsa_data_t *msg_data) +{ + int count; + guint npeers; + + count = crmd_join_phase_count(crm_join_finalized); + if (count > 0) { + crm_err("%d cluster node%s failed to confirm join", + count, pcmk__plural_s(count)); + crmd_join_phase_log(LOG_NOTICE); + return; + } + + npeers = crm_active_peers(); + count = crmd_join_phase_count(crm_join_confirmed); + if (count == npeers) { + if (npeers == 1) { + crm_debug("Sole active cluster node is fully joined"); + } else { + crm_debug("All %d active cluster nodes are fully joined", count); + } + + } else if (count > npeers) { + crm_err("New election needed because more nodes confirmed join " + "than are in membership (%d > %u)", count, npeers); + register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); + + } else if (controld_globals.membership_id != crm_peer_seq) { + crm_info("New join needed because membership changed (%llu -> %llu)", + controld_globals.membership_id, crm_peer_seq); + register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); + + } else { + crm_warn("Only %d of %u active cluster nodes fully joined " + "(%d did not respond to offer)", + count, npeers, crmd_join_phase_count(crm_join_welcomed)); + } +} + +static void +do_state_transition(enum crmd_fsa_state cur_state, + enum crmd_fsa_state next_state, fsa_data_t *msg_data) +{ + int level = LOG_INFO; + int count = 0; + gboolean clear_recovery_bit = TRUE; +#if 0 + uint64_t original_fsa_actions = controld_globals.fsa_actions; +#endif + + enum crmd_fsa_cause cause = msg_data->fsa_cause; + enum crmd_fsa_input current_input = msg_data->fsa_input; + + const char *state_from = fsa_state2string(cur_state); + const char *state_to = fsa_state2string(next_state); + const char *input = fsa_input2string(current_input); + + CRM_LOG_ASSERT(cur_state != next_state); + + do_dot_log(DOT_PREFIX "\t%s -> %s [ label=%s cause=%s origin=%s ]", + state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); + + if (cur_state == S_IDLE || next_state == S_IDLE) { + level = LOG_NOTICE; + } else if (cur_state == S_NOT_DC || next_state == S_NOT_DC) { + level = LOG_NOTICE; + } else if (cur_state == S_ELECTION) { + level = LOG_NOTICE; + } else if (cur_state == S_STARTING) { + level = LOG_NOTICE; + } else if (next_state == S_RECOVERY) { + level = LOG_WARNING; + } + + do_crm_log(level, "State transition %s -> %s " + CRM_XS " input=%s cause=%s origin=%s", + state_from, state_to, input, fsa_cause2string(cause), + msg_data->origin); + + if (next_state != S_ELECTION && cur_state != S_RELEASE_DC) { + controld_stop_current_election_timeout(); + } +#if 0 + if ((controld_globals.fsa_input_register & R_SHUTDOWN)) { + controld_set_fsa_action_flags(A_DC_TIMER_STOP); + } +#endif + if (next_state == S_INTEGRATION) { + controld_set_fsa_action_flags(A_INTEGRATE_TIMER_START); + } else { + controld_set_fsa_action_flags(A_INTEGRATE_TIMER_STOP); + } + + if (next_state == S_FINALIZE_JOIN) { + controld_set_fsa_action_flags(A_FINALIZE_TIMER_START); + } else { + controld_set_fsa_action_flags(A_FINALIZE_TIMER_STOP); + } + + if (next_state != S_PENDING) { + controld_set_fsa_action_flags(A_DC_TIMER_STOP); + } + if (next_state != S_IDLE) { + controld_stop_recheck_timer(); + } + + if (cur_state == S_FINALIZE_JOIN && next_state == S_POLICY_ENGINE) { + populate_cib_nodes(node_update_quick|node_update_all, __func__); + } + + switch (next_state) { + case S_PENDING: + { + cib_t *cib_conn = controld_globals.cib_conn; + cib_conn->cmds->set_secondary(cib_conn, cib_scope_local); + } + update_dc(NULL); + break; + + case S_ELECTION: + update_dc(NULL); + break; + + case S_NOT_DC: + controld_reset_counter_election_timer(); + purge_stonith_cleanup(); + + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + crm_info("(Re)Issuing shutdown request now" " that we have a new DC"); + controld_set_fsa_action_flags(A_SHUTDOWN_REQ); + } + CRM_LOG_ASSERT(controld_globals.dc_name != NULL); + if (controld_globals.dc_name == NULL) { + crm_err("Reached S_NOT_DC without a DC" " being recorded"); + } + break; + + case S_RECOVERY: + clear_recovery_bit = FALSE; + break; + + case S_FINALIZE_JOIN: + CRM_LOG_ASSERT(AM_I_DC); + if (cause == C_TIMER_POPPED) { + crm_warn("Progressed to state %s after %s", + fsa_state2string(next_state), fsa_cause2string(cause)); + } + count = crmd_join_phase_count(crm_join_welcomed); + if (count > 0) { + crm_warn("%d cluster node%s failed to respond to join offer", + count, pcmk__plural_s(count)); + crmd_join_phase_log(LOG_NOTICE); + + } else { + crm_debug("All cluster nodes (%d) responded to join offer", + crmd_join_phase_count(crm_join_integrated)); + } + break; + + case S_POLICY_ENGINE: + controld_reset_counter_election_timer(); + CRM_LOG_ASSERT(AM_I_DC); + if (cause == C_TIMER_POPPED) { + crm_info("Progressed to state %s after %s", + fsa_state2string(next_state), fsa_cause2string(cause)); + } + check_join_counts(msg_data); + break; + + case S_STOPPING: + case S_TERMINATE: + /* possibly redundant */ + controld_set_fsa_input_flags(R_SHUTDOWN); + break; + + case S_IDLE: + CRM_LOG_ASSERT(AM_I_DC); + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + crm_info("(Re)Issuing shutdown request now" " that we are the DC"); + controld_set_fsa_action_flags(A_SHUTDOWN_REQ); + } + controld_start_recheck_timer(); + break; + + default: + break; + } + + if (clear_recovery_bit && next_state != S_PENDING) { + controld_clear_fsa_action_flags(A_RECOVER); + } else if (clear_recovery_bit == FALSE) { + controld_set_fsa_action_flags(A_RECOVER); + } + +#if 0 + if (original_fsa_actions != controld_globals.fsa_actions) { + fsa_dump_actions(original_fsa_actions ^ controld_globals.fsa_actions, + "New actions"); + } +#endif +} diff --git a/daemons/controld/controld_fsa.h b/daemons/controld/controld_fsa.h new file mode 100644 index 0000000..2b79f07 --- /dev/null +++ b/daemons/controld/controld_fsa.h @@ -0,0 +1,694 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef CRMD_FSA__H +# define CRMD_FSA__H + +# include +# include +# include +# include +# include +# include +# include + +/*! States the controller can be in */ +enum crmd_fsa_state { + S_IDLE = 0, /* Nothing happening */ + + S_ELECTION, /* Take part in the election algorithm as + * described below + */ + S_INTEGRATION, /* integrate that status of new nodes (which is + * all of them if we have just been elected DC) + * to form a complete and up-to-date picture of + * the CIB + */ + S_FINALIZE_JOIN, /* integrate that status of new nodes (which is + * all of them if we have just been elected DC) + * to form a complete and up-to-date picture of + * the CIB + */ + S_NOT_DC, /* we are in non-DC mode */ + S_POLICY_ENGINE, /* Determine next stable state of the cluster */ + S_RECOVERY, /* Something bad happened, check everything is ok + * before continuing and attempt to recover if + * required + */ + S_RELEASE_DC, /* we were the DC, but now we arent anymore, + * possibly by our own request, and we should + * release all unnecessary sub-systems, finish + * any pending actions, do general cleanup and + * unset anything that makes us think we are + * special :) + */ + S_STARTING, /* we are just starting out */ + S_PENDING, /* we are not a full/active member yet */ + S_STOPPING, /* We are in the final stages of shutting down */ + S_TERMINATE, /* We are going to shutdown, this is the equiv of + * "Sending TERM signal to all processes" in Linux + * and in worst case scenarios could be considered + * a self STONITH + */ + S_TRANSITION_ENGINE, /* Attempt to make the calculated next stable + * state of the cluster a reality + */ + + S_HALT, /* Freeze - don't do anything + * Something bad happened that needs the admin to fix + * Wait for I_ELECTION + */ + + /* ----------- Last input found in table is above ---------- */ + S_ILLEGAL /* This is an illegal FSA state */ + /* (must be last) */ +}; + +# define MAXSTATE S_ILLEGAL + +/* + Once we start and do some basic sanity checks, we go into the + S_NOT_DC state and await instructions from the DC or input from + the cluster layer which indicates the election algorithm needs to run. + + If the election algorithm is triggered, we enter the S_ELECTION state + from where we can either go back to the S_NOT_DC state or progress + to the S_INTEGRATION state (or S_RELEASE_DC if we used to be the DC + but aren't anymore). See the libcrmcluster API documentation for more + information about the election algorithm. + + Once the election is complete, if we are the DC, we enter the + S_INTEGRATION state which is a DC-in-waiting style state. We are + the DC, but we shouldn't do anything yet because we may not have an + up-to-date picture of the cluster. There may of course be times + when this fails, so we should go back to the S_RECOVERY stage and + check everything is ok. We may also end up here if a new node came + online, since each node is authoritative about itself, and we would want + to incorporate its information into the CIB. + + Once we have the latest CIB, we then enter the S_POLICY_ENGINE state + where invoke the scheduler. It is possible that between + invoking the scheduler and receiving an answer, that we receive + more input. In this case, we would discard the orginal result and + invoke it again. + + Once we are satisfied with the output from the scheduler, we + enter S_TRANSITION_ENGINE and feed the scheduler's output to the + Transition Engine who attempts to make the scheduler's + calculation a reality. If the transition completes successfully, + we enter S_IDLE, otherwise we go back to S_POLICY_ENGINE with the + current unstable state and try again. + + Of course, we may be asked to shutdown at any time, however we must + progress to S_NOT_DC before doing so. Once we have handed over DC + duties to another node, we can then shut down like everyone else, + that is, by asking the DC for permission and waiting for it to take all + our resources away. + + The case where we are the DC and the only node in the cluster is a + special case and handled as an escalation which takes us to + S_SHUTDOWN. Similarly, if any other point in the shutdown + fails or stalls, this is escalated and we end up in S_TERMINATE. + + At any point, the controller can relay messages for its subsystems, + but outbound messages (from subsystems) should probably be blocked + until S_INTEGRATION (for the DC) or the join protocol has + completed (for non-DC controllers). +*/ + +/*====================================== + * + * Inputs/Events/Stimuli to be given to the finite state machine + * + * Some of these a true events, and others are synthesised based on + * the "register" (see below) and the contents or source of messages. + * + * The machine keeps processing until receiving I_NULL + * + *======================================*/ +enum crmd_fsa_input { +/* 0 */ + I_NULL, /* Nothing happened */ +/* 1 */ + + I_CIB_OP, /* An update to the CIB occurred */ + I_CIB_UPDATE, /* An update to the CIB occurred */ + I_DC_TIMEOUT, /* We have lost communication with the DC */ + I_ELECTION, /* Someone started an election */ + I_PE_CALC, /* The scheduler needs to be invoked */ + I_RELEASE_DC, /* The election completed and we were not + * elected, but we were the DC beforehand + */ + I_ELECTION_DC, /* The election completed and we were (re-)elected + * DC + */ + I_ERROR, /* Something bad happened (more serious than + * I_FAIL) and may not have been due to the action + * being performed. For example, we may have lost + * our connection to the CIB. + */ +/* 9 */ + I_FAIL, /* The action failed to complete successfully */ + I_INTEGRATED, + I_FINALIZED, + I_NODE_JOIN, /* A node has entered the cluster */ + I_NOT_DC, /* We are not and were not the DC before or after + * the current operation or state + */ + I_RECOVERED, /* The recovery process completed successfully */ + I_RELEASE_FAIL, /* We could not give up DC status for some reason + */ + I_RELEASE_SUCCESS, /* We are no longer the DC */ + I_RESTART, /* The current set of actions needs to be + * restarted + */ + I_TE_SUCCESS, /* Some non-resource, non-cluster-layer action + * is required of us, e.g. ping + */ +/* 20 */ + I_ROUTER, /* Do our job as router and forward this to the + * right place + */ + I_SHUTDOWN, /* We are asking to shutdown */ + I_STOP, /* We have been told to shutdown */ + I_TERMINATE, /* Actually exit */ + I_STARTUP, + I_PE_SUCCESS, /* The action completed successfully */ + + I_JOIN_OFFER, /* The DC is offering membership */ + I_JOIN_REQUEST, /* The client is requesting membership */ + I_JOIN_RESULT, /* If not the DC: The result of a join request + * Else: A client is responding with its local state info + */ + + I_WAIT_FOR_EVENT, /* we may be waiting for an async task to "happen" + * and until it does, we can't do anything else + */ + + I_DC_HEARTBEAT, /* The DC is telling us that it is alive and well */ + + I_LRM_EVENT, + +/* 30 */ + I_PENDING, + I_HALT, + + /* ------------ Last input found in table is above ----------- */ + I_ILLEGAL /* This is an illegal value for an FSA input */ + /* (must be last) */ +}; + +# define MAXINPUT I_ILLEGAL + +# define I_MESSAGE I_ROUTER + +/*====================================== + * + * actions + * + * Some of the actions below will always occur together for now, but this may + * not always be the case, so they are split up so that they can easily be + * called independently in the future, if necessary. + * + * For example, separating A_LRM_CONNECT from A_STARTUP might be useful + * if we ever try to recover from a faulty or disconnected executor. + * + *======================================*/ + + /* Don't do anything */ +# define A_NOTHING 0x0000000000000000ULL + +/* -- Startup actions -- */ + /* Hook to perform any actions (other than connecting to other daemons) + * that might be needed as part of the startup. + */ +# define A_STARTUP 0x0000000000000001ULL + /* Hook to perform any actions that might be needed as part + * after startup is successful. + */ +# define A_STARTED 0x0000000000000002ULL + /* Connect to cluster layer */ +# define A_HA_CONNECT 0x0000000000000004ULL +# define A_HA_DISCONNECT 0x0000000000000008ULL + +# define A_INTEGRATE_TIMER_START 0x0000000000000010ULL +# define A_INTEGRATE_TIMER_STOP 0x0000000000000020ULL +# define A_FINALIZE_TIMER_START 0x0000000000000040ULL +# define A_FINALIZE_TIMER_STOP 0x0000000000000080ULL + +/* -- Election actions -- */ +# define A_DC_TIMER_START 0x0000000000000100ULL +# define A_DC_TIMER_STOP 0x0000000000000200ULL +# define A_ELECTION_COUNT 0x0000000000000400ULL +# define A_ELECTION_VOTE 0x0000000000000800ULL + +# define A_ELECTION_START 0x0000000000001000ULL + +/* -- Message processing -- */ + /* Process the queue of requests */ +# define A_MSG_PROCESS 0x0000000000002000ULL + /* Send the message to the correct recipient */ +# define A_MSG_ROUTE 0x0000000000004000ULL + + /* Send a welcome message to new node(s) */ +# define A_DC_JOIN_OFFER_ONE 0x0000000000008000ULL + +/* -- Server Join protocol actions -- */ + /* Send a welcome message to all nodes */ +# define A_DC_JOIN_OFFER_ALL 0x0000000000010000ULL + /* Process the remote node's ack of our join message */ +# define A_DC_JOIN_PROCESS_REQ 0x0000000000020000ULL + /* Send out the results of the Join phase */ +# define A_DC_JOIN_FINALIZE 0x0000000000040000ULL + /* Send out the results of the Join phase */ +# define A_DC_JOIN_PROCESS_ACK 0x0000000000080000ULL + +/* -- Client Join protocol actions -- */ +# define A_CL_JOIN_QUERY 0x0000000000100000ULL +# define A_CL_JOIN_ANNOUNCE 0x0000000000200000ULL + /* Request membership to the DC list */ +# define A_CL_JOIN_REQUEST 0x0000000000400000ULL + /* Did the DC accept or reject the request */ +# define A_CL_JOIN_RESULT 0x0000000000800000ULL + +/* -- Recovery, DC start/stop -- */ + /* Something bad happened, try to recover */ +# define A_RECOVER 0x0000000001000000ULL + /* Hook to perform any actions (apart from starting, the TE, scheduler, + * and gathering the latest CIB) that might be necessary before + * giving up the responsibilities of being the DC. + */ +# define A_DC_RELEASE 0x0000000002000000ULL + /* */ +# define A_DC_RELEASED 0x0000000004000000ULL + /* Hook to perform any actions (apart from starting, the TE, scheduler, + * and gathering the latest CIB) that might be necessary before + * taking over the responsibilities of being the DC. + */ +# define A_DC_TAKEOVER 0x0000000008000000ULL + +/* -- Shutdown actions -- */ +# define A_SHUTDOWN 0x0000000010000000ULL +# define A_STOP 0x0000000020000000ULL +# define A_EXIT_0 0x0000000040000000ULL +# define A_EXIT_1 0x0000000080000000ULL + +# define A_SHUTDOWN_REQ 0x0000000100000000ULL +# define A_ELECTION_CHECK 0x0000000200000000ULL +# define A_DC_JOIN_FINAL 0x0000000400000000ULL + +/* -- CIB actions -- */ +# define A_CIB_START 0x0000020000000000ULL +# define A_CIB_STOP 0x0000040000000000ULL + +/* -- Transition Engine actions -- */ + /* Attempt to reach the newly calculated cluster state. This is + * only called once per transition (except if it is asked to + * stop the transition or start a new one). + * Once given a cluster state to reach, the TE will determine + * tasks that can be performed in parallel, execute them, wait + * for replies and then determine the next set until the new + * state is reached or no further tasks can be taken. + */ +# define A_TE_INVOKE 0x0000100000000000ULL +# define A_TE_START 0x0000200000000000ULL +# define A_TE_STOP 0x0000400000000000ULL +# define A_TE_CANCEL 0x0000800000000000ULL +# define A_TE_HALT 0x0001000000000000ULL + +/* -- Scheduler actions -- */ + /* Calculate the next state for the cluster. This is only + * invoked once per needed calculation. + */ +# define A_PE_INVOKE 0x0002000000000000ULL +# define A_PE_START 0x0004000000000000ULL +# define A_PE_STOP 0x0008000000000000ULL +/* -- Misc actions -- */ + /* Add a system generate "block" so that resources arent moved + * to or are activly moved away from the affected node. This + * way we can return quickly even if busy with other things. + */ +# define A_NODE_BLOCK 0x0010000000000000ULL + /* Update our information in the local CIB */ +# define A_UPDATE_NODESTATUS 0x0020000000000000ULL +# define A_READCONFIG 0x0080000000000000ULL + +/* -- LRM Actions -- */ + /* Connect to pacemaker-execd */ +# define A_LRM_CONNECT 0x0100000000000000ULL + /* Disconnect from pacemaker-execd */ +# define A_LRM_DISCONNECT 0x0200000000000000ULL +# define A_LRM_INVOKE 0x0400000000000000ULL +# define A_LRM_EVENT 0x0800000000000000ULL + +/* -- Logging actions -- */ +# define A_LOG 0x1000000000000000ULL +# define A_ERROR 0x2000000000000000ULL +# define A_WARN 0x4000000000000000ULL + +# define O_EXIT (A_SHUTDOWN|A_STOP|A_LRM_DISCONNECT|A_HA_DISCONNECT|A_EXIT_0|A_CIB_STOP) +# define O_RELEASE (A_DC_TIMER_STOP|A_DC_RELEASE|A_PE_STOP|A_TE_STOP|A_DC_RELEASED) +# define O_PE_RESTART (A_PE_START|A_PE_STOP) +# define O_TE_RESTART (A_TE_START|A_TE_STOP) +# define O_CIB_RESTART (A_CIB_START|A_CIB_STOP) +# define O_LRM_RECONNECT (A_LRM_CONNECT|A_LRM_DISCONNECT) +# define O_DC_TIMER_RESTART (A_DC_TIMER_STOP|A_DC_TIMER_START) +/*====================================== + * + * "register" contents + * + * Things we may want to remember regardless of which state we are in. + * + * These also count as inputs for synthesizing I_* + * + *======================================*/ +# define R_THE_DC 0x00000001ULL + /* Are we the DC? */ +# define R_STARTING 0x00000002ULL + /* Are we starting up? */ +# define R_SHUTDOWN 0x00000004ULL + /* Are we trying to shut down? */ +# define R_STAYDOWN 0x00000008ULL + /* Should we restart? */ + +# define R_JOIN_OK 0x00000010ULL /* Have we completed the join process */ +# define R_READ_CONFIG 0x00000040ULL +# define R_INVOKE_PE 0x00000080ULL // Should the scheduler be invoked? + +# define R_CIB_CONNECTED 0x00000100ULL + /* Is the CIB connected? */ +# define R_PE_CONNECTED 0x00000200ULL // Is the scheduler connected? +# define R_TE_CONNECTED 0x00000400ULL + /* Is the Transition Engine connected? */ +# define R_LRM_CONNECTED 0x00000800ULL // Is pacemaker-execd connected? + +# define R_CIB_REQUIRED 0x00001000ULL + /* Is the CIB required? */ +# define R_PE_REQUIRED 0x00002000ULL // Is the scheduler required? +# define R_TE_REQUIRED 0x00004000ULL + /* Is the Transition Engine required? */ +# define R_ST_REQUIRED 0x00008000ULL + /* Is the Stonith daemon required? */ + +# define R_CIB_DONE 0x00010000ULL + /* Have we calculated the CIB? */ +# define R_HAVE_CIB 0x00020000ULL /* Do we have an up-to-date CIB */ + +# define R_MEMBERSHIP 0x00100000ULL /* Have we got cluster layer data yet */ +# define R_PEER_DATA 0x00200000ULL /* Have we got T_CL_STATUS data yet */ + +# define R_HA_DISCONNECTED 0x00400000ULL /* did we sign out of our own accord */ + +# define R_REQ_PEND 0x01000000ULL + /* Are there Requests waiting for + processing? */ +# define R_PE_PEND 0x02000000ULL // Are we awaiting reply from scheduler? +# define R_TE_PEND 0x04000000ULL + /* Has the TE been invoked and we're + awaiting completion? */ +# define R_RESP_PEND 0x08000000ULL + /* Do we have clients waiting on a + response? if so perhaps we shouldn't + stop yet */ + +# define R_SENT_RSC_STOP 0x20000000ULL /* Have we sent a stop action to all + * resources in preparation for + * shutting down */ + +# define R_IN_RECOVERY 0x80000000ULL + +#define CRM_DIRECT_NACK_RC (99) // Deprecated (see PCMK_EXEC_INVALID) + +enum crmd_fsa_cause { + C_UNKNOWN = 0, + C_STARTUP, + C_IPC_MESSAGE, + C_HA_MESSAGE, + C_CRMD_STATUS_CALLBACK, + C_LRM_OP_CALLBACK, + C_TIMER_POPPED, + C_SHUTDOWN, + C_FSA_INTERNAL, +}; + +enum fsa_data_type { + fsa_dt_none, + fsa_dt_ha_msg, + fsa_dt_xml, + fsa_dt_lrm, +}; + +typedef struct fsa_data_s fsa_data_t; +struct fsa_data_s { + int id; + enum crmd_fsa_input fsa_input; + enum crmd_fsa_cause fsa_cause; + uint64_t actions; + const char *origin; + void *data; + enum fsa_data_type data_type; +}; + +#define controld_set_fsa_input_flags(flags_to_set) do { \ + controld_globals.fsa_input_register \ + = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ + "FSA input", "controller", \ + controld_globals.fsa_input_register, \ + (flags_to_set), #flags_to_set); \ + } while (0) + +#define controld_clear_fsa_input_flags(flags_to_clear) do { \ + controld_globals.fsa_input_register \ + = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ + "FSA input", "controller", \ + controld_globals.fsa_input_register, \ + (flags_to_clear), \ + #flags_to_clear); \ + } while (0) + +#define controld_set_fsa_action_flags(flags_to_set) do { \ + controld_globals.fsa_actions \ + = pcmk__set_flags_as(__func__, __LINE__, LOG_DEBUG, \ + "FSA action", "controller", \ + controld_globals.fsa_actions, \ + (flags_to_set), #flags_to_set); \ + } while (0) + +#define controld_clear_fsa_action_flags(flags_to_clear) do { \ + controld_globals.fsa_actions \ + = pcmk__clear_flags_as(__func__, __LINE__, LOG_DEBUG, \ + "FSA action", "controller", \ + controld_globals.fsa_actions, \ + (flags_to_clear), #flags_to_clear); \ + } while (0) + +// This should be moved elsewhere +xmlNode *controld_query_executor_state(void); + +const char *fsa_input2string(enum crmd_fsa_input input); +const char *fsa_state2string(enum crmd_fsa_state state); +const char *fsa_cause2string(enum crmd_fsa_cause cause); +const char *fsa_action2string(long long action); + +enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause); + +enum crmd_fsa_state controld_fsa_get_next_state(enum crmd_fsa_input input); + +uint64_t controld_fsa_get_action(enum crmd_fsa_input input); + +void controld_init_fsa_trigger(void); +void controld_destroy_fsa_trigger(void); + +void free_max_generation(void); + +# define AM_I_DC pcmk_is_set(controld_globals.fsa_input_register, R_THE_DC) +# define controld_trigger_fsa() controld_trigger_fsa_as(__func__, __LINE__) + +void controld_trigger_fsa_as(const char *fn, int line); + +/* A_READCONFIG */ +void do_read_config(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t *msg_data); + +/* A_PE_INVOKE */ +void do_pe_invoke(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t *msg_data); + +/* A_LOG */ +void do_log(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_STARTUP */ +void do_startup(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_CIB_START, STOP, RESTART */ +void do_cib_control(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_HA_CONNECT */ +void do_ha_control(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_LRM_CONNECT */ +void do_lrm_control(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_PE_START, STOP, RESTART */ +void do_pe_control(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_TE_START, STOP, RESTART */ +void do_te_control(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_STARTED */ +void do_started(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_MSG_ROUTE */ +void do_msg_route(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_RECOVER */ +void do_recover(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_ELECTION_VOTE */ +void do_election_vote(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_ELECTION_COUNT */ +void do_election_count_vote(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, + fsa_data_t *msg_data); + +/* A_ELECTION_CHECK */ +void do_election_check(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_DC_TIMER_STOP */ +void do_timer_control(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_DC_TAKEOVER */ +void do_dc_takeover(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_DC_RELEASE */ +void do_dc_release(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_DC_JOIN_OFFER_ALL */ +void do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_DC_JOIN_OFFER_ONE */ +void do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_DC_JOIN_ACK */ +void do_dc_join_ack(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_DC_JOIN_REQ */ +void do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, + fsa_data_t *msg_data); + +/* A_DC_JOIN_FINALIZE */ +void do_dc_join_finalize(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_CL_JOIN_QUERY */ +/* is there a DC out there? */ +void do_cl_join_query(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t *msg_data); + +/* A_CL_JOIN_ANNOUNCE */ +void do_cl_join_announce(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t *msg_data); + +/* A_CL_JOIN_REQUEST */ +void do_cl_join_offer_respond(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, + fsa_data_t *msg_data); + +/* A_CL_JOIN_RESULT */ +void do_cl_join_finalize_respond(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, + fsa_data_t *msg_data); + +/* A_LRM_INVOKE */ +void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_LRM_EVENT */ +void do_lrm_event(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_TE_INVOKE, A_TE_CANCEL */ +void do_te_invoke(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_SHUTDOWN_REQ */ +void do_shutdown_req(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_SHUTDOWN */ +void do_shutdown(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_STOP */ +void do_stop(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_EXIT_0, A_EXIT_1 */ +void do_exit(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input cur_input, fsa_data_t *msg_data); + +/* A_DC_JOIN_FINAL */ +void do_dc_join_final(long long action, enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t *msg_data); +#endif diff --git a/daemons/controld/controld_globals.h b/daemons/controld/controld_globals.h new file mode 100644 index 0000000..eff1607 --- /dev/null +++ b/daemons/controld/controld_globals.h @@ -0,0 +1,143 @@ +/* + * Copyright 2022-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef CONTROLD_GLOBALS__H +# define CONTROLD_GLOBALS__H + +#include // pcmk__output_t, etc. + +#include // uint32_t, uint64_t +#include // GList, GMainLoop +#include // cib_t +#include // pcmk__graph_t +#include // enum crmd_fsa_state + +typedef struct { + // Booleans + + //! Group of \p controld_flags values + uint32_t flags; + + + // Controller FSA + + //! FSA state + enum crmd_fsa_state fsa_state; + + //! FSA actions (group of \p A_* flags) + uint64_t fsa_actions; + + //! FSA input register contents (group of \p R_* flags) + uint64_t fsa_input_register; + + //! FSA message queue + GList *fsa_message_queue; + + + // CIB + + //! Connection to the CIB + cib_t *cib_conn; + + //! CIB connection's client ID + const char *cib_client_id; + + + // Scheduler + + //! Reference of the scheduler request being waited on + char *fsa_pe_ref; + + + // Transitioner + + //! Transitioner UUID + char *te_uuid; + + //! Graph of transition currently being processed + pcmk__graph_t *transition_graph; + + + // Logging + + //! Output object for controller log messages + pcmk__output_t *logger_out; + + + // Other + + //! Cluster name + char *cluster_name; + + //! Designated controller name + char *dc_name; + + //! Designated controller's Pacemaker version + char *dc_version; + + //! Local node's node name + char *our_nodename; + + //! Local node's UUID + char *our_uuid; + + //! Last saved cluster communication layer membership ID + unsigned long long membership_id; + + //! Max lifetime (in seconds) of a resource's shutdown lock to a node + guint shutdown_lock_limit; + + //! Main event loop + GMainLoop *mainloop; +} controld_globals_t; + +extern controld_globals_t controld_globals; + +/*! + * \internal + * \enum controld_flags + * \brief Bit flags to store various controller state and configuration info + */ +enum controld_flags { + //! The DC left in a membership change that is being processed + controld_dc_left = (1 << 0), + + //! The FSA is stalled waiting for further input + controld_fsa_is_stalled = (1 << 1), + + //! The local node has been in a quorate partition at some point + controld_ever_had_quorum = (1 << 2), + + //! The local node is currently in a quorate partition + controld_has_quorum = (1 << 3), + + //! Panic the local node if it loses quorum + controld_no_quorum_suicide = (1 << 4), + + //! Lock resources to the local node when it shuts down cleanly + controld_shutdown_lock_enabled = (1 << 5), +}; + +# define controld_set_global_flags(flags_to_set) do { \ + controld_globals.flags = pcmk__set_flags_as(__func__, __LINE__, \ + LOG_TRACE, \ + "Global", "controller", \ + controld_globals.flags, \ + (flags_to_set), \ + #flags_to_set); \ + } while (0) + +# define controld_clear_global_flags(flags_to_clear) do { \ + controld_globals.flags \ + = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Global", \ + "controller", controld_globals.flags, \ + (flags_to_clear), #flags_to_clear); \ + } while (0) + +#endif // ifndef CONTROLD_GLOBALS__H diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c new file mode 100644 index 0000000..da6a9d6 --- /dev/null +++ b/daemons/controld/controld_join_client.c @@ -0,0 +1,366 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include + +#include + +void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); + +extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig); + +/*! + * \internal + * \brief Remember if DC is shutting down as we join + * + * If we're joining while the current DC is shutting down, update its expected + * state, so we don't fence it if we become the new DC. (We weren't a peer + * when it broadcast its shutdown request.) + * + * \param[in] msg A join message from the DC + */ +static void +update_dc_expected(const xmlNode *msg) +{ + if ((controld_globals.dc_name != NULL) + && pcmk__xe_attr_is_true(msg, F_CRM_DC_LEAVING)) { + crm_node_t *dc_node = crm_get_peer(0, controld_globals.dc_name); + + pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_DOWN); + } +} + +/* A_CL_JOIN_QUERY */ +/* is there a DC out there? */ +void +do_cl_join_query(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL, + CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); + + sleep(1); // Give the cluster layer time to propagate to the DC + update_dc(NULL); /* Unset any existing value so that the result is not discarded */ + crm_debug("Querying for a DC"); + send_cluster_message(NULL, crm_msg_crmd, req, FALSE); + free_xml(req); +} + +/* A_CL_JOIN_ANNOUNCE */ + +/* this is kind of a workaround for the fact that we may not be around or + * are otherwise unable to reply when the DC sends out A_DC_JOIN_OFFER_ALL + */ +void +do_cl_join_announce(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + /* don't announce if we're in one of these states */ + if (cur_state != S_PENDING) { + crm_warn("Not announcing cluster join because in state %s", + fsa_state2string(cur_state)); + return; + } + + if (!pcmk_is_set(controld_globals.fsa_input_register, R_STARTING)) { + /* send as a broadcast */ + xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL, + CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); + + crm_debug("Announcing availability"); + update_dc(NULL); + send_cluster_message(NULL, crm_msg_crmd, req, FALSE); + free_xml(req); + + } else { + /* Delay announce until we have finished local startup */ + crm_warn("Delaying announce of cluster join until local startup is complete"); + return; + } +} + +static int query_call_id = 0; + +/* A_CL_JOIN_REQUEST */ +/* aka. accept the welcome offer */ +void +do_cl_join_offer_respond(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + cib_t *cib_conn = controld_globals.cib_conn; + + ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); + const char *welcome_from; + const char *join_id; + + CRM_CHECK(input != NULL, return); + +#if 0 + if (we are sick) { + log error; + + /* save the request for later? */ + return; + } +#endif + + welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM); + join_id = crm_element_value(input->msg, F_CRM_JOIN_ID); + crm_trace("Accepting cluster join offer from node %s "CRM_XS" join-%s", + welcome_from, crm_element_value(input->msg, F_CRM_JOIN_ID)); + + /* we only ever want the last one */ + if (query_call_id > 0) { + crm_trace("Cancelling previous join query: %d", query_call_id); + remove_cib_op_callback(query_call_id, FALSE); + query_call_id = 0; + } + + if (update_dc(input->msg) == FALSE) { + crm_warn("Discarding cluster join offer from node %s (expected %s)", + welcome_from, controld_globals.dc_name); + return; + } + + update_dc_expected(input->msg); + + query_call_id = cib_conn->cmds->query(cib_conn, NULL, NULL, + cib_scope_local|cib_no_children); + fsa_register_cib_callback(query_call_id, strdup(join_id), + join_query_callback); + crm_trace("Registered join query callback: %d", query_call_id); + + controld_set_fsa_action_flags(A_DC_TIMER_STOP); + controld_trigger_fsa(); +} + +void +join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + char *join_id = user_data; + xmlNode *generation = create_xml_node(NULL, XML_CIB_TAG_GENERATION_TUPPLE); + + CRM_LOG_ASSERT(join_id != NULL); + + if (query_call_id != call_id) { + crm_trace("Query %d superseded", call_id); + goto done; + } + + query_call_id = 0; + if(rc != pcmk_ok || output == NULL) { + crm_err("Could not retrieve version details for join-%s: %s (%d)", + join_id, pcmk_strerror(rc), rc); + register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); + + } else if (controld_globals.dc_name == NULL) { + crm_debug("Membership is in flux, not continuing join-%s", join_id); + + } else { + xmlNode *reply = NULL; + + crm_debug("Respond to join offer join-%s from %s", + join_id, controld_globals.dc_name); + copy_in_properties(generation, output); + + reply = create_request(CRM_OP_JOIN_REQUEST, generation, + controld_globals.dc_name, CRM_SYSTEM_DC, + CRM_SYSTEM_CRMD, NULL); + + crm_xml_add(reply, F_CRM_JOIN_ID, join_id); + crm_xml_add(reply, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); + send_cluster_message(crm_get_peer(0, controld_globals.dc_name), + crm_msg_crmd, reply, TRUE); + free_xml(reply); + } + + done: + free_xml(generation); +} + +static void +set_join_state(const char * start_state) +{ + if (pcmk__str_eq(start_state, "standby", pcmk__str_casei)) { + crm_notice("Forcing node %s to join in %s state per configured " + "environment", controld_globals.our_nodename, start_state); + cib__update_node_attr(controld_globals.logger_out, + controld_globals.cib_conn, cib_sync_call, + XML_CIB_TAG_NODES, controld_globals.our_uuid, + NULL, NULL, NULL, "standby", "on", NULL, NULL); + + } else if (pcmk__str_eq(start_state, "online", pcmk__str_casei)) { + crm_notice("Forcing node %s to join in %s state per configured " + "environment", controld_globals.our_nodename, start_state); + cib__update_node_attr(controld_globals.logger_out, + controld_globals.cib_conn, cib_sync_call, + XML_CIB_TAG_NODES, controld_globals.our_uuid, + NULL, NULL, NULL, "standby", "off", NULL, NULL); + + } else if (pcmk__str_eq(start_state, "default", pcmk__str_casei)) { + crm_debug("Not forcing a starting state on node %s", + controld_globals.our_nodename); + + } else { + crm_warn("Unrecognized start state '%s', using 'default' (%s)", + start_state, controld_globals.our_nodename); + } +} + +static int +update_conn_host_cache(xmlNode *node, void *userdata) +{ + const char *remote = crm_element_value(node, XML_ATTR_ID); + const char *conn_host = crm_element_value(node, PCMK__XA_CONN_HOST); + const char *state = crm_element_value(node, XML_CIB_TAG_STATE); + + crm_node_t *remote_peer = crm_remote_peer_get(remote); + + if (remote_peer == NULL) { + return pcmk_rc_ok; + } + + if (conn_host != NULL) { + pcmk__str_update(&remote_peer->conn_host, conn_host); + } + + if (state != NULL) { + pcmk__update_peer_state(__func__, remote_peer, state, 0); + } + + return pcmk_rc_ok; +} + +/* A_CL_JOIN_RESULT */ +/* aka. this is notification that we have (or have not) been accepted */ +void +do_cl_join_finalize_respond(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + xmlNode *tmp1 = NULL; + gboolean was_nack = TRUE; + static gboolean first_join = TRUE; + ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); + const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE); + + int join_id = -1; + const char *op = crm_element_value(input->msg, F_CRM_TASK); + const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM); + + if (!pcmk__str_eq(op, CRM_OP_JOIN_ACKNAK, pcmk__str_casei)) { + crm_trace("Ignoring op=%s message", op); + return; + } + + /* calculate if it was an ack or a nack */ + if (pcmk__xe_attr_is_true(input->msg, CRM_OP_JOIN_ACKNAK)) { + was_nack = FALSE; + } + + crm_element_value_int(input->msg, F_CRM_JOIN_ID, &join_id); + + if (was_nack) { + crm_err("Shutting down because cluster join with leader %s failed " + CRM_XS" join-%d NACK'd", welcome_from, join_id); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + controld_set_fsa_input_flags(R_STAYDOWN); + return; + } + + if (!AM_I_DC + && pcmk__str_eq(welcome_from, controld_globals.our_nodename, + pcmk__str_casei)) { + crm_warn("Discarding our own welcome - we're no longer the DC"); + return; + } + + if (update_dc(input->msg) == FALSE) { + crm_warn("Discarding %s from node %s (expected from %s)", + op, welcome_from, controld_globals.dc_name); + return; + } + + update_dc_expected(input->msg); + + /* record the node's feature set as a transient attribute */ + update_attrd(controld_globals.our_nodename, CRM_ATTR_FEATURE_SET, + CRM_FEATURE_SET, NULL, FALSE); + + /* send our status section to the DC */ + tmp1 = controld_query_executor_state(); + if (tmp1 != NULL) { + xmlNode *remotes = NULL; + xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, + controld_globals.dc_name, CRM_SYSTEM_DC, + CRM_SYSTEM_CRMD, NULL); + + crm_xml_add_int(reply, F_CRM_JOIN_ID, join_id); + + crm_debug("Confirming join-%d: sending local operation history to %s", + join_id, controld_globals.dc_name); + + /* + * If this is the node's first join since the controller started on it, + * set its initial state (standby or member) according to the user's + * preference. + * + * We do not clear the LRM history here. Even if the DC failed to do it + * when we last left, removing them here creates a race condition if the + * controller is being recovered. Instead of a list of active resources + * from the executor, we may end up with a blank status section. If we + * are _NOT_ lucky, we will probe for the "wrong" instance of anonymous + * clones and end up with multiple active instances on the machine. + */ + if (first_join + && !pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + + first_join = FALSE; + if (start_state) { + set_join_state(start_state); + } + } + + send_cluster_message(crm_get_peer(0, controld_globals.dc_name), + crm_msg_crmd, reply, TRUE); + free_xml(reply); + + if (AM_I_DC == FALSE) { + register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, + __func__); + } + + free_xml(tmp1); + + /* Update the remote node cache with information about which node + * is hosting the connection. + */ + remotes = pcmk__xe_match(input->msg, XML_CIB_TAG_NODES, NULL, NULL); + if (remotes != NULL) { + pcmk__xe_foreach_child(remotes, XML_CIB_TAG_NODE, update_conn_host_cache, NULL); + } + + } else { + crm_err("Could not confirm join-%d with %s: Local operation history " + "failed", join_id, controld_globals.dc_name); + register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); + } +} diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c new file mode 100644 index 0000000..f82b132 --- /dev/null +++ b/daemons/controld/controld_join_dc.c @@ -0,0 +1,987 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include + +#include +#include +#include + +#include + +static char *max_generation_from = NULL; +static xmlNodePtr max_generation_xml = NULL; + +/*! + * \internal + * \brief Nodes from which a CIB sync has failed since the peer joined + * + * This table is of the form (node_name -> join_id). \p node_name is + * the name of a client node from which a CIB \p sync_from() call has failed in + * \p do_dc_join_finalize() since the client joined the cluster as a peer. + * \p join_id is the ID of the join round in which the \p sync_from() failed, + * and is intended for use in nack log messages. + */ +static GHashTable *failed_sync_nodes = NULL; + +void finalize_join_for(gpointer key, gpointer value, gpointer user_data); +void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); +gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); + +/* Numeric counter used to identify join rounds (an unsigned int would be + * appropriate, except we get and set it in XML as int) + */ +static int current_join_id = 0; + +/*! + * \internal + * \brief Destroy the hash table containing failed sync nodes + */ +void +controld_destroy_failed_sync_table(void) +{ + if (failed_sync_nodes != NULL) { + g_hash_table_destroy(failed_sync_nodes); + failed_sync_nodes = NULL; + } +} + +/*! + * \internal + * \brief Remove a node from the failed sync nodes table if present + * + * \param[in] node_name Node name to remove + */ +void +controld_remove_failed_sync_node(const char *node_name) +{ + if (failed_sync_nodes != NULL) { + g_hash_table_remove(failed_sync_nodes, (gchar *) node_name); + } +} + +/*! + * \internal + * \brief Add to a hash table a node whose CIB failed to sync + * + * \param[in] node_name Name of node whose CIB failed to sync + * \param[in] join_id Join round when the failure occurred + */ +static void +record_failed_sync_node(const char *node_name, gint join_id) +{ + if (failed_sync_nodes == NULL) { + failed_sync_nodes = pcmk__strikey_table(g_free, NULL); + } + + /* If the node is already in the table then we failed to nack it during the + * filter offer step + */ + CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name), + GINT_TO_POINTER(join_id))); +} + +/*! + * \internal + * \brief Look up a node name in the failed sync table + * + * \param[in] node_name Name of node to look up + * \param[out] join_id Where to store the join ID of when the sync failed + * + * \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the + * node name was found, or \p pcmk_rc_node_unknown otherwise. + * \note \p *join_id is set to -1 if the node is not found. + */ +static int +lookup_failed_sync_node(const char *node_name, gint *join_id) +{ + *join_id = -1; + + if (failed_sync_nodes != NULL) { + gpointer result = g_hash_table_lookup(failed_sync_nodes, + (gchar *) node_name); + if (result != NULL) { + *join_id = GPOINTER_TO_INT(result); + return pcmk_rc_ok; + } + } + return pcmk_rc_node_unknown; +} + +void +crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase) +{ + enum crm_join_phase last = 0; + + CRM_CHECK(node != NULL, return); + + /* Remote nodes do not participate in joins */ + if (pcmk_is_set(node->flags, crm_remote_node)) { + return; + } + + last = node->join; + + if(phase == last) { + crm_trace("Node %s join-%d phase is still %s " + CRM_XS " nodeid=%u source=%s", + node->uname, current_join_id, crm_join_phase_str(last), + node->id, source); + + } else if ((phase <= crm_join_none) || (phase == (last + 1))) { + node->join = phase; + crm_trace("Node %s join-%d phase is now %s (was %s) " + CRM_XS " nodeid=%u source=%s", + node->uname, current_join_id, crm_join_phase_str(phase), + crm_join_phase_str(last), node->id, source); + + } else { + crm_warn("Rejecting join-%d phase update for node %s because " + "can't go from %s to %s " CRM_XS " nodeid=%u source=%s", + current_join_id, node->uname, crm_join_phase_str(last), + crm_join_phase_str(phase), node->id, source); + } +} + +static void +start_join_round(void) +{ + GHashTableIter iter; + crm_node_t *peer = NULL; + + crm_debug("Starting new join round join-%d", current_join_id); + + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { + crm_update_peer_join(__func__, peer, crm_join_none); + } + if (max_generation_from != NULL) { + free(max_generation_from); + max_generation_from = NULL; + } + if (max_generation_xml != NULL) { + free_xml(max_generation_xml); + max_generation_xml = NULL; + } + controld_clear_fsa_input_flags(R_HAVE_CIB); + controld_forget_all_cib_replace_calls(); +} + +/*! + * \internal + * \brief Create a join message from the DC + * + * \param[in] join_op Join operation name + * \param[in] host_to Recipient of message + */ +static xmlNode * +create_dc_message(const char *join_op, const char *host_to) +{ + xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD, + CRM_SYSTEM_DC, NULL); + + /* Identify which election this is a part of */ + crm_xml_add_int(msg, F_CRM_JOIN_ID, current_join_id); + + /* Add a field specifying whether the DC is shutting down. This keeps the + * joining node from fencing the old DC if it becomes the new DC. + */ + pcmk__xe_set_bool_attr(msg, F_CRM_DC_LEAVING, + pcmk_is_set(controld_globals.fsa_input_register, + R_SHUTDOWN)); + return msg; +} + +static void +join_make_offer(gpointer key, gpointer value, gpointer user_data) +{ + xmlNode *offer = NULL; + crm_node_t *member = (crm_node_t *)value; + + CRM_ASSERT(member != NULL); + if (crm_is_peer_active(member) == FALSE) { + crm_info("Not making join-%d offer to inactive node %s", + current_join_id, + (member->uname? member->uname : "with unknown name")); + if(member->expected == NULL && pcmk__str_eq(member->state, CRM_NODE_LOST, pcmk__str_casei)) { + /* You would think this unsafe, but in fact this plus an + * active resource is what causes it to be fenced. + * + * Yes, this does mean that any node that dies at the same + * time as the old DC and is not running resource (still) + * won't be fenced. + * + * I'm not happy about this either. + */ + pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN); + } + return; + } + + if (member->uname == NULL) { + crm_info("Not making join-%d offer to node uuid %s with unknown name", + current_join_id, member->uuid); + return; + } + + if (controld_globals.membership_id != crm_peer_seq) { + controld_globals.membership_id = crm_peer_seq; + crm_info("Making join-%d offers based on membership event %llu", + current_join_id, crm_peer_seq); + } + + if(user_data && member->join > crm_join_none) { + crm_info("Not making join-%d offer to already known node %s (%s)", + current_join_id, member->uname, + crm_join_phase_str(member->join)); + return; + } + + crm_update_peer_join(__func__, (crm_node_t*)member, crm_join_none); + + offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname); + + // Advertise our feature set so the joining node can bail if not compatible + crm_xml_add(offer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); + + crm_info("Sending join-%d offer to %s", current_join_id, member->uname); + send_cluster_message(member, crm_msg_crmd, offer, TRUE); + free_xml(offer); + + crm_update_peer_join(__func__, member, crm_join_welcomed); +} + +/* A_DC_JOIN_OFFER_ALL */ +void +do_dc_join_offer_all(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + int count; + + /* Reset everyone's status back to down or in_ccm in the CIB. + * Any nodes that are active in the CIB but not in the cluster membership + * will be seen as offline by the scheduler anyway. + */ + current_join_id++; + start_join_round(); + + update_dc(NULL); + if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) { + crm_info("A new node joined the cluster"); + } + g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL); + + count = crmd_join_phase_count(crm_join_welcomed); + crm_info("Waiting on join-%d requests from %d outstanding node%s", + current_join_id, count, pcmk__plural_s(count)); + + // Don't waste time by invoking the scheduler yet +} + +/* A_DC_JOIN_OFFER_ONE */ +void +do_dc_join_offer_one(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + crm_node_t *member; + ha_msg_input_t *welcome = NULL; + int count; + const char *join_to = NULL; + + if (msg_data->data == NULL) { + crm_info("Making join-%d offers to any unconfirmed nodes " + "because an unknown node joined", current_join_id); + g_hash_table_foreach(crm_peer_cache, join_make_offer, &member); + check_join_state(cur_state, __func__); + return; + } + + welcome = fsa_typed_data(fsa_dt_ha_msg); + if (welcome == NULL) { + // fsa_typed_data() already logged an error + return; + } + + join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM); + if (join_to == NULL) { + crm_err("Can't make join-%d offer to unknown node", current_join_id); + return; + } + member = crm_get_peer(0, join_to); + + /* It is possible that a node will have been sick or starting up when the + * original offer was made. However, it will either re-announce itself in + * due course, or we can re-store the original offer on the client. + */ + + crm_update_peer_join(__func__, member, crm_join_none); + join_make_offer(NULL, member, NULL); + + /* If the offer isn't to the local node, make an offer to the local node as + * well, to ensure the correct value for max_generation_from. + */ + if (strcasecmp(join_to, controld_globals.our_nodename) != 0) { + member = crm_get_peer(0, controld_globals.our_nodename); + join_make_offer(NULL, member, NULL); + } + + /* This was a genuine join request; cancel any existing transition and + * invoke the scheduler. + */ + abort_transition(INFINITY, pcmk__graph_restart, "Node join", NULL); + + count = crmd_join_phase_count(crm_join_welcomed); + crm_info("Waiting on join-%d requests from %d outstanding node%s", + current_join_id, count, pcmk__plural_s(count)); + + // Don't waste time by invoking the scheduler yet +} + +static int +compare_int_fields(xmlNode * left, xmlNode * right, const char *field) +{ + const char *elem_l = crm_element_value(left, field); + const char *elem_r = crm_element_value(right, field); + + long long int_elem_l; + long long int_elem_r; + + pcmk__scan_ll(elem_l, &int_elem_l, -1LL); + pcmk__scan_ll(elem_r, &int_elem_r, -1LL); + + if (int_elem_l < int_elem_r) { + return -1; + + } else if (int_elem_l > int_elem_r) { + return 1; + } + + return 0; +} + +/* A_DC_JOIN_PROCESS_REQ */ +void +do_dc_join_filter_offer(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + xmlNode *generation = NULL; + + int cmp = 0; + int join_id = -1; + int count = 0; + gint value = 0; + gboolean ack_nack_bool = TRUE; + ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); + + const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); + const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE); + const char *join_version = crm_element_value(join_ack->msg, + XML_ATTR_CRM_VERSION); + crm_node_t *join_node = NULL; + + if (join_from == NULL) { + crm_err("Ignoring invalid join request without node name"); + return; + } + join_node = crm_get_peer(0, join_from); + + crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); + if (join_id != current_join_id) { + crm_debug("Ignoring join-%d request from %s because we are on join-%d", + join_id, join_from, current_join_id); + check_join_state(cur_state, __func__); + return; + } + + generation = join_ack->xml; + if (max_generation_xml != NULL && generation != NULL) { + int lpc = 0; + + const char *attributes[] = { + XML_ATTR_GENERATION_ADMIN, + XML_ATTR_GENERATION, + XML_ATTR_NUMUPDATES, + }; + + for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) { + cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]); + } + } + + if (ref == NULL) { + ref = "none"; // for logging only + } + + if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) { + crm_err("Rejecting join-%d request from node %s because we failed to " + "sync its CIB in join-%d " CRM_XS " ref=%s", + join_id, join_from, value, ref); + ack_nack_bool = FALSE; + + } else if (!crm_is_peer_active(join_node)) { + if (match_down_event(join_from) != NULL) { + /* The join request was received after the node was fenced or + * otherwise shutdown in a way that we're aware of. No need to log + * an error in this rare occurrence; we know the client was recently + * shut down, and receiving a lingering in-flight request is not + * cause for alarm. + */ + crm_debug("Rejecting join-%d request from inactive node %s " + CRM_XS " ref=%s", join_id, join_from, ref); + } else { + crm_err("Rejecting join-%d request from inactive node %s " + CRM_XS " ref=%s", join_id, join_from, ref); + } + ack_nack_bool = FALSE; + + } else if (generation == NULL) { + crm_err("Rejecting invalid join-%d request from node %s " + "missing CIB generation " CRM_XS " ref=%s", + join_id, join_from, ref); + ack_nack_bool = FALSE; + + } else if ((join_version == NULL) + || !feature_set_compatible(CRM_FEATURE_SET, join_version)) { + crm_err("Rejecting join-%d request from node %s because feature set %s" + " is incompatible with ours (%s) " CRM_XS " ref=%s", + join_id, join_from, (join_version? join_version : "pre-3.1.0"), + CRM_FEATURE_SET, ref); + ack_nack_bool = FALSE; + + } else if (max_generation_xml == NULL) { + const char *validation = crm_element_value(generation, + XML_ATTR_VALIDATION); + + if (get_schema_version(validation) < 0) { + crm_err("Rejecting join-%d request from %s (with first CIB " + "generation) due to unknown schema version %s " + CRM_XS " ref=%s", + join_id, join_from, validation, ref); + ack_nack_bool = FALSE; + + } else { + crm_debug("Accepting join-%d request from %s (with first CIB " + "generation) " CRM_XS " ref=%s", + join_id, join_from, ref); + max_generation_xml = copy_xml(generation); + pcmk__str_update(&max_generation_from, join_from); + } + + } else if ((cmp < 0) + || ((cmp == 0) + && pcmk__str_eq(join_from, controld_globals.our_nodename, + pcmk__str_casei))) { + const char *validation = crm_element_value(generation, + XML_ATTR_VALIDATION); + + if (get_schema_version(validation) < 0) { + crm_err("Rejecting join-%d request from %s (with better CIB " + "generation than current best from %s) due to unknown " + "schema version %s " CRM_XS " ref=%s", + join_id, join_from, max_generation_from, validation, ref); + ack_nack_bool = FALSE; + + } else { + crm_debug("Accepting join-%d request from %s (with better CIB " + "generation than current best from %s) " CRM_XS " ref=%s", + join_id, join_from, max_generation_from, ref); + crm_log_xml_debug(max_generation_xml, "Old max generation"); + crm_log_xml_debug(generation, "New max generation"); + + free_xml(max_generation_xml); + max_generation_xml = copy_xml(join_ack->xml); + pcmk__str_update(&max_generation_from, join_from); + } + + } else { + crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s", + join_id, join_from, ref); + } + + if (!ack_nack_bool) { + if (compare_version(join_version, "3.17.0") < 0) { + /* Clients with CRM_FEATURE_SET < 3.17.0 may respawn infinitely + * after a nack message, don't send one + */ + crm_update_peer_join(__func__, join_node, crm_join_nack_quiet); + } else { + crm_update_peer_join(__func__, join_node, crm_join_nack); + } + pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK); + + } else { + crm_update_peer_join(__func__, join_node, crm_join_integrated); + pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER); + } + + count = crmd_join_phase_count(crm_join_integrated); + crm_debug("%d node%s currently integrated in join-%d", + count, pcmk__plural_s(count), join_id); + + if (check_join_state(cur_state, __func__) == FALSE) { + // Don't waste time by invoking the scheduler yet + count = crmd_join_phase_count(crm_join_welcomed); + crm_debug("Waiting on join-%d requests from %d outstanding node%s", + join_id, count, pcmk__plural_s(count)); + } +} + +/* A_DC_JOIN_FINALIZE */ +void +do_dc_join_finalize(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + char *sync_from = NULL; + int rc = pcmk_ok; + int count_welcomed = crmd_join_phase_count(crm_join_welcomed); + int count_finalizable = crmd_join_phase_count(crm_join_integrated) + + crmd_join_phase_count(crm_join_nack) + + crmd_join_phase_count(crm_join_nack_quiet); + + /* This we can do straight away and avoid clients timing us out + * while we compute the latest CIB + */ + if (count_welcomed != 0) { + crm_debug("Waiting on join-%d requests from %d outstanding node%s " + "before finalizing join", current_join_id, count_welcomed, + pcmk__plural_s(count_welcomed)); + crmd_join_phase_log(LOG_DEBUG); + /* crmd_fsa_stall(FALSE); Needed? */ + return; + + } else if (count_finalizable == 0) { + crm_debug("Finalization not needed for join-%d at the current time", + current_join_id); + crmd_join_phase_log(LOG_DEBUG); + check_join_state(controld_globals.fsa_state, __func__); + return; + } + + controld_clear_fsa_input_flags(R_HAVE_CIB); + if (pcmk__str_eq(max_generation_from, controld_globals.our_nodename, + pcmk__str_null_matches|pcmk__str_casei)) { + controld_set_fsa_input_flags(R_HAVE_CIB); + } + + if (!controld_globals.transition_graph->complete) { + crm_warn("Delaying join-%d finalization while transition in progress", + current_join_id); + crmd_join_phase_log(LOG_DEBUG); + crmd_fsa_stall(FALSE); + return; + } + + if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { + // Send our CIB out to everyone + pcmk__str_update(&sync_from, controld_globals.our_nodename); + crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)", + current_join_id, count_finalizable, + pcmk__plural_s(count_finalizable)); + crm_log_xml_debug(max_generation_xml, "Requested CIB version"); + + } else { + // Ask for the agreed best CIB + pcmk__str_update(&sync_from, max_generation_from); + crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)", + current_join_id, count_finalizable, + pcmk__plural_s(count_finalizable), sync_from); + crm_log_xml_notice(max_generation_xml, "Requested CIB version"); + } + crmd_join_phase_log(LOG_DEBUG); + + rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn, + sync_from, NULL, cib_none); + + if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { + controld_record_cib_replace_call(rc); + } + fsa_register_cib_callback(rc, sync_from, finalize_sync_callback); +} + +void +free_max_generation(void) +{ + free(max_generation_from); + max_generation_from = NULL; + + free_xml(max_generation_xml); + max_generation_xml = NULL; +} + +void +finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + CRM_LOG_ASSERT(-EPERM != rc); + + controld_forget_cib_replace_call(call_id); + + if (rc != pcmk_ok) { + const char *sync_from = (const char *) user_data; + + do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR), + "Could not sync CIB from %s in join-%d: %s", + sync_from, current_join_id, pcmk_strerror(rc)); + + if (rc != -pcmk_err_old_data) { + record_failed_sync_node(sync_from, current_join_id); + } + + /* restart the whole join process */ + register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, + __func__); + + } else if (!AM_I_DC) { + crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id); + + } else if (controld_globals.fsa_state != S_FINALIZE_JOIN) { + crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN " + "(%s)", current_join_id, + fsa_state2string(controld_globals.fsa_state)); + + } else { + controld_set_fsa_input_flags(R_HAVE_CIB); + + /* make sure dc_uuid is re-set to us */ + if (!check_join_state(controld_globals.fsa_state, __func__)) { + int count_finalizable = 0; + + count_finalizable = crmd_join_phase_count(crm_join_integrated) + + crmd_join_phase_count(crm_join_nack) + + crmd_join_phase_count(crm_join_nack_quiet); + + crm_debug("Notifying %d node%s of join-%d results", + count_finalizable, pcmk__plural_s(count_finalizable), + current_join_id); + g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL); + } + } +} + +static void +join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + fsa_data_t *msg_data = NULL; + + if (rc == pcmk_ok) { + crm_debug("join-%d node history update (via CIB call %d) complete", + current_join_id, call_id); + check_join_state(controld_globals.fsa_state, __func__); + + } else { + crm_err("join-%d node history update (via CIB call %d) failed: %s " + "(next transition may determine resource status incorrectly)", + current_join_id, call_id, pcmk_strerror(rc)); + crm_log_xml_debug(msg, "failed"); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } +} + +/* A_DC_JOIN_PROCESS_ACK */ +void +do_dc_join_ack(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + int join_id = -1; + ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); + enum controld_section_e section = controld_section_lrm; + const int cib_opts = cib_scope_local|cib_can_create; + + const char *op = crm_element_value(join_ack->msg, F_CRM_TASK); + const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); + crm_node_t *peer = NULL; + + // Sanity checks + if (join_from == NULL) { + crm_warn("Ignoring message received without node identification"); + return; + } + if (op == NULL) { + crm_warn("Ignoring message received from %s without task", join_from); + return; + } + + if (strcmp(op, CRM_OP_JOIN_CONFIRM)) { + crm_debug("Ignoring '%s' message from %s while waiting for '%s'", + op, join_from, CRM_OP_JOIN_CONFIRM); + return; + } + + if (crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id) != 0) { + crm_warn("Ignoring join confirmation from %s without valid join ID", + join_from); + return; + } + + peer = crm_get_peer(0, join_from); + if (peer->join != crm_join_finalized) { + crm_info("Ignoring out-of-sequence join-%d confirmation from %s " + "(currently %s not %s)", + join_id, join_from, crm_join_phase_str(peer->join), + crm_join_phase_str(crm_join_finalized)); + return; + } + + if (join_id != current_join_id) { + crm_err("Rejecting join-%d confirmation from %s " + "because currently on join-%d", + join_id, join_from, current_join_id); + crm_update_peer_join(__func__, peer, crm_join_nack); + return; + } + + crm_update_peer_join(__func__, peer, crm_join_confirmed); + + /* Update CIB with node's current executor state. A new transition will be + * triggered later, when the CIB notifies us of the change. + */ + if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { + section = controld_section_lrm_unlocked; + } + controld_delete_node_state(join_from, section, cib_scope_local); + if (pcmk__str_eq(join_from, controld_globals.our_nodename, + pcmk__str_casei)) { + xmlNode *now_dc_lrmd_state = controld_query_executor_state(); + + if (now_dc_lrmd_state != NULL) { + crm_debug("Updating local node history for join-%d " + "from query result", join_id); + controld_update_cib(XML_CIB_TAG_STATUS, now_dc_lrmd_state, cib_opts, + join_update_complete_callback); + free_xml(now_dc_lrmd_state); + } else { + crm_warn("Updating local node history from join-%d confirmation " + "because query failed", join_id); + controld_update_cib(XML_CIB_TAG_STATUS, join_ack->xml, cib_opts, + join_update_complete_callback); + } + } else { + crm_debug("Updating node history for %s from join-%d confirmation", + join_from, join_id); + controld_update_cib(XML_CIB_TAG_STATUS, join_ack->xml, cib_opts, + join_update_complete_callback); + } +} + +void +finalize_join_for(gpointer key, gpointer value, gpointer user_data) +{ + xmlNode *acknak = NULL; + xmlNode *tmp1 = NULL; + crm_node_t *join_node = value; + const char *join_to = join_node->uname; + bool integrated = false; + + switch (join_node->join) { + case crm_join_integrated: + integrated = true; + break; + case crm_join_nack: + case crm_join_nack_quiet: + break; + default: + crm_trace("Not updating non-integrated and non-nacked node %s (%s) " + "for join-%d", join_to, + crm_join_phase_str(join_node->join), current_join_id); + return; + } + + /* Update the element with the node's name and UUID, in case they + * weren't known before + */ + crm_trace("Updating node name and UUID in CIB for %s", join_to); + tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE); + set_uuid(tmp1, XML_ATTR_ID, join_node); + crm_xml_add(tmp1, XML_ATTR_UNAME, join_to); + fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1); + free_xml(tmp1); + + if (join_node->join == crm_join_nack_quiet) { + crm_trace("Not sending nack message to node %s with feature set older " + "than 3.17.0", join_to); + return; + } + + join_node = crm_get_peer(0, join_to); + if (!crm_is_peer_active(join_node)) { + /* + * NACK'ing nodes that the membership layer doesn't know about yet + * simply creates more churn + * + * Better to leave them waiting and let the join restart when + * the new membership event comes in + * + * All other NACKs (due to versions etc) should still be processed + */ + pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING); + return; + } + + // Acknowledge or nack node's join request + crm_debug("%sing join-%d request from %s", + integrated? "Acknowledg" : "Nack", current_join_id, join_to); + acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to); + pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated); + + if (integrated) { + // No change needed for a nacked node + crm_update_peer_join(__func__, join_node, crm_join_finalized); + pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER); + + /* Iterate through the remote peer cache and add information on which + * node hosts each to the ACK message. This keeps new controllers in + * sync with what has already happened. + */ + if (crm_remote_peer_cache_size() != 0) { + GHashTableIter iter; + crm_node_t *node = NULL; + xmlNode *remotes = create_xml_node(acknak, XML_CIB_TAG_NODES); + + g_hash_table_iter_init(&iter, crm_remote_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + xmlNode *remote = NULL; + + if (!node->conn_host) { + continue; + } + + remote = create_xml_node(remotes, XML_CIB_TAG_NODE); + pcmk__xe_set_props(remote, + XML_ATTR_ID, node->uname, + XML_CIB_TAG_STATE, node->state, + PCMK__XA_CONN_HOST, node->conn_host, + NULL); + } + } + } + send_cluster_message(join_node, crm_msg_crmd, acknak, TRUE); + free_xml(acknak); + return; +} + +gboolean +check_join_state(enum crmd_fsa_state cur_state, const char *source) +{ + static unsigned long long highest_seq = 0; + + if (controld_globals.membership_id != crm_peer_seq) { + crm_debug("join-%d: Membership changed from %llu to %llu " + CRM_XS " highest=%llu state=%s for=%s", + current_join_id, controld_globals.membership_id, crm_peer_seq, + highest_seq, fsa_state2string(cur_state), source); + if(highest_seq < crm_peer_seq) { + /* Don't spam the FSA with duplicates */ + highest_seq = crm_peer_seq; + register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); + } + + } else if (cur_state == S_INTEGRATION) { + if (crmd_join_phase_count(crm_join_welcomed) == 0) { + int count = crmd_join_phase_count(crm_join_integrated); + + crm_debug("join-%d: Integration of %d peer%s complete " + CRM_XS " state=%s for=%s", + current_join_id, count, pcmk__plural_s(count), + fsa_state2string(cur_state), source); + register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL); + return TRUE; + } + + } else if (cur_state == S_FINALIZE_JOIN) { + if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { + crm_debug("join-%d: Delaying finalization until we have CIB " + CRM_XS " state=%s for=%s", + current_join_id, fsa_state2string(cur_state), source); + return TRUE; + + } else if (crmd_join_phase_count(crm_join_welcomed) != 0) { + int count = crmd_join_phase_count(crm_join_welcomed); + + crm_debug("join-%d: Still waiting on %d welcomed node%s " + CRM_XS " state=%s for=%s", + current_join_id, count, pcmk__plural_s(count), + fsa_state2string(cur_state), source); + crmd_join_phase_log(LOG_DEBUG); + + } else if (crmd_join_phase_count(crm_join_integrated) != 0) { + int count = crmd_join_phase_count(crm_join_integrated); + + crm_debug("join-%d: Still waiting on %d integrated node%s " + CRM_XS " state=%s for=%s", + current_join_id, count, pcmk__plural_s(count), + fsa_state2string(cur_state), source); + crmd_join_phase_log(LOG_DEBUG); + + } else if (crmd_join_phase_count(crm_join_finalized) != 0) { + int count = crmd_join_phase_count(crm_join_finalized); + + crm_debug("join-%d: Still waiting on %d finalized node%s " + CRM_XS " state=%s for=%s", + current_join_id, count, pcmk__plural_s(count), + fsa_state2string(cur_state), source); + crmd_join_phase_log(LOG_DEBUG); + + } else { + crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s", + current_join_id, fsa_state2string(cur_state), source); + register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL); + return TRUE; + } + } + + return FALSE; +} + +void +do_dc_join_final(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + crm_debug("Ensuring DC, quorum and node attributes are up-to-date"); + crm_update_quorum(crm_have_quorum, TRUE); +} + +int crmd_join_phase_count(enum crm_join_phase phase) +{ + int count = 0; + crm_node_t *peer; + GHashTableIter iter; + + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { + if(peer->join == phase) { + count++; + } + } + return count; +} + +void crmd_join_phase_log(int level) +{ + crm_node_t *peer; + GHashTableIter iter; + + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { + do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname, + crm_join_phase_str(peer->join)); + } +} diff --git a/daemons/controld/controld_lrm.h b/daemons/controld/controld_lrm.h new file mode 100644 index 0000000..25f3db3 --- /dev/null +++ b/daemons/controld/controld_lrm.h @@ -0,0 +1,188 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ +#ifndef CONTROLD_LRM__H +# define CONTROLD_LRM__H + +#include + +extern gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level); +void lrm_clear_last_failure(const char *rsc_id, const char *node_name, + const char *operation, guint interval_ms); +void lrm_op_callback(lrmd_event_data_t * op); +lrmd_t *crmd_local_lrmd_conn(void); + +typedef struct resource_history_s { + char *id; + uint32_t last_callid; + lrmd_rsc_info_t rsc; + lrmd_event_data_t *last; + lrmd_event_data_t *failed; + GList *recurring_op_list; + + /* Resources must be stopped using the same + * parameters they were started with. This hashtable + * holds the parameters that should be used for the next stop + * cmd on this resource. */ + GHashTable *stop_params; +} rsc_history_t; + +void history_free(gpointer data); + +enum active_op_e { + active_op_remove = (1 << 0), + active_op_cancelled = (1 << 1), +}; + +// In-flight action (recurring or pending) +typedef struct active_op_s { + guint interval_ms; + int call_id; + uint32_t flags; // bitmask of active_op_e + time_t start_time; + time_t lock_time; + char *rsc_id; + char *op_type; + char *op_key; + char *user_data; + GHashTable *params; +} active_op_t; + +#define controld_set_active_op_flags(active_op, flags_to_set) do { \ + (active_op)->flags = pcmk__set_flags_as(__func__, __LINE__, \ + LOG_TRACE, "Active operation", (active_op)->op_key, \ + (active_op)->flags, (flags_to_set), #flags_to_set); \ + } while (0) + +#define controld_clear_active_op_flags(active_op, flags_to_clear) do { \ + (active_op)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ + LOG_TRACE, "Active operation", (active_op)->op_key, \ + (active_op)->flags, (flags_to_clear), #flags_to_clear); \ + } while (0) + +typedef struct lrm_state_s { + const char *node_name; + void *conn; // Reserved for controld_execd_state.c usage + void *remote_ra_data; // Reserved for controld_remote_ra.c usage + + GHashTable *resource_history; + GHashTable *active_ops; // Pending and recurring actions + GHashTable *deletion_ops; + GHashTable *rsc_info_cache; + GHashTable *metadata_cache; // key = class[:provider]:agent, value = ra_metadata_s + + int num_lrm_register_fails; +} lrm_state_t; + +struct pending_deletion_op_s { + char *rsc; + ha_msg_input_t *input; +}; + +/*! + * \brief Check whether this the local IPC connection to the executor + */ +gboolean +lrm_state_is_local(lrm_state_t *lrm_state); + +/*! + * \brief Clear all state information from a single state entry. + * \note It sometimes useful to save metadata cache when it won't go stale. + * \note This does not close the executor connection + */ +void lrm_state_reset_tables(lrm_state_t * lrm_state, gboolean reset_metadata); +GList *lrm_state_get_list(void); + +/*! + * \brief Initiate internal state tables + */ +gboolean lrm_state_init_local(void); + +/*! + * \brief Destroy all state entries and internal state tables + */ +void lrm_state_destroy_all(void); + +/*! + * \brief Destroy executor connection by node name + */ +void lrm_state_destroy(const char *node_name); + +/*! + * \brief Find lrm_state data by node name + */ +lrm_state_t *lrm_state_find(const char *node_name); + +/*! + * \brief Either find or create a new entry + */ +lrm_state_t *lrm_state_find_or_create(const char *node_name); + +/*! + * The functions below are wrappers for the executor API the the controller + * uses. These wrapper functions allow us to treat the controller's remote + * executor connection resources the same as regular resources. Internally, + * regular resources go to the executor, and remote connection resources are + * handled locally in the controller. + */ +void lrm_state_disconnect_only(lrm_state_t * lrm_state); +void lrm_state_disconnect(lrm_state_t * lrm_state); +int controld_connect_local_executor(lrm_state_t *lrm_state); +int controld_connect_remote_executor(lrm_state_t *lrm_state, const char *server, + int port, int timeout); +int lrm_state_is_connected(lrm_state_t * lrm_state); +int lrm_state_poke_connection(lrm_state_t * lrm_state); + +int lrm_state_get_metadata(lrm_state_t * lrm_state, + const char *class, + const char *provider, + const char *agent, char **output, enum lrmd_call_options options); +int lrm_state_cancel(lrm_state_t *lrm_state, const char *rsc_id, + const char *action, guint interval_ms); +int controld_execute_resource_agent(lrm_state_t *lrm_state, const char *rsc_id, + const char *action, const char *userdata, + guint interval_ms, int timeout_ms, + int start_delay_ms, + GHashTable *parameters, int *call_id); +lrmd_rsc_info_t *lrm_state_get_rsc_info(lrm_state_t * lrm_state, + const char *rsc_id, enum lrmd_call_options options); +int lrm_state_register_rsc(lrm_state_t * lrm_state, + const char *rsc_id, + const char *class, + const char *provider, const char *agent, enum lrmd_call_options options); +int lrm_state_unregister_rsc(lrm_state_t * lrm_state, + const char *rsc_id, enum lrmd_call_options options); + +// Functions used to manage remote executor connection resources +void remote_lrm_op_callback(lrmd_event_data_t * op); +gboolean is_remote_lrmd_ra(const char *agent, const char *provider, const char *id); +lrmd_rsc_info_t *remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id); +int remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id, + const char *action, guint interval_ms); +int controld_execute_remote_agent(const lrm_state_t *lrm_state, + const char *rsc_id, const char *action, + const char *userdata, + guint interval_ms, int timeout_ms, + int start_delay_ms, lrmd_key_value_t *params, + int *call_id); +void remote_ra_cleanup(lrm_state_t * lrm_state); +void remote_ra_fail(const char *node_name); +void remote_ra_process_pseudo(xmlNode *xml); +gboolean remote_ra_is_in_maintenance(lrm_state_t * lrm_state); +void remote_ra_process_maintenance_nodes(xmlNode *xml); +gboolean remote_ra_controlling_guest(lrm_state_t * lrm_state); + +void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, + active_op_t *pending, const xmlNode *action_xml); +void controld_ack_event_directly(const char *to_host, const char *to_sys, + const lrmd_rsc_info_t *rsc, + lrmd_event_data_t *op, const char *rsc_id); +void controld_rc2event(lrmd_event_data_t *event, int rc); +void controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id); + +#endif diff --git a/daemons/controld/controld_matrix.c b/daemons/controld/controld_matrix.c new file mode 100644 index 0000000..a404f0a --- /dev/null +++ b/daemons/controld/controld_matrix.c @@ -0,0 +1,1250 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include + +#include // uint64_t + +#include + +/* + * The state transition table. The rows are inputs, and + * the columns are states. + */ +static const enum crmd_fsa_state fsa_next_states[MAXINPUT][MAXSTATE] = { +/* Got an I_NULL */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_CIB_OP */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_CIB_UPDATE */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_RECOVERY, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_RECOVERY, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_DC_TIMEOUT */ + { + /* S_IDLE ==> */ S_RECOVERY, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_RECOVERY, + /* S_FINALIZE_JOIN ==> */ S_RECOVERY, + /* S_NOT_DC ==> */ S_ELECTION, + /* S_POLICY_ENGINE ==> */ S_RECOVERY, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RECOVERY, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_ELECTION, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_RECOVERY, + /* S_HALT ==> */ S_ELECTION, + }, + +/* Got an I_ELECTION */ + { + /* S_IDLE ==> */ S_ELECTION, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_ELECTION, + /* S_FINALIZE_JOIN ==> */ S_ELECTION, + /* S_NOT_DC ==> */ S_ELECTION, + /* S_POLICY_ENGINE ==> */ S_ELECTION, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_ELECTION, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_ELECTION, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_PE_CALC */ + { + /* S_IDLE ==> */ S_POLICY_ENGINE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_POLICY_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_RELEASE_DC */ + { + /* S_IDLE ==> */ S_RELEASE_DC, + /* S_ELECTION ==> */ S_RELEASE_DC, + /* S_INTEGRATION ==> */ S_RELEASE_DC, + /* S_FINALIZE_JOIN ==> */ S_RELEASE_DC, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_RELEASE_DC, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_RELEASE_DC, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_ELECTION_DC */ + { + /* S_IDLE ==> */ S_INTEGRATION, + /* S_ELECTION ==> */ S_INTEGRATION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_INTEGRATION, + /* S_NOT_DC ==> */ S_INTEGRATION, + /* S_POLICY_ENGINE ==> */ S_INTEGRATION, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_ERROR */ + { + /* S_IDLE ==> */ S_RECOVERY, + /* S_ELECTION ==> */ S_RECOVERY, + /* S_INTEGRATION ==> */ S_RECOVERY, + /* S_FINALIZE_JOIN ==> */ S_RECOVERY, + /* S_NOT_DC ==> */ S_RECOVERY, + /* S_POLICY_ENGINE ==> */ S_RECOVERY, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RECOVERY, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_RECOVERY, + /* S_STOPPING ==> */ S_TERMINATE, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_RECOVERY, + /* S_HALT ==> */ S_RECOVERY, + }, + +/* Got an I_FAIL */ + { + /* S_IDLE ==> */ S_RECOVERY, + /* S_ELECTION ==> */ S_RELEASE_DC, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_INTEGRATION, + /* S_NOT_DC ==> */ S_RECOVERY, + /* S_POLICY_ENGINE ==> */ S_INTEGRATION, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STOPPING, + /* S_PENDING ==> */ S_STOPPING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_POLICY_ENGINE, + /* S_HALT ==> */ S_RELEASE_DC, + }, + +/* Got an I_INTEGRATED */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_FINALIZE_JOIN, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_RECOVERY, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_FINALIZED */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_POLICY_ENGINE, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_RECOVERY, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_NODE_JOIN */ + { + /* S_IDLE ==> */ S_INTEGRATION, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_INTEGRATION, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_INTEGRATION, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_NOT_DC */ + { + /* S_IDLE ==> */ S_RECOVERY, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_RECOVERY, + /* S_FINALIZE_JOIN ==> */ S_RECOVERY, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_RECOVERY, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_NOT_DC, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_RECOVERY, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_RECOVERED */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_INTEGRATION, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_PENDING, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_RELEASE_FAIL */ + { + /* S_IDLE ==> */ S_STOPPING, + /* S_ELECTION ==> */ S_STOPPING, + /* S_INTEGRATION ==> */ S_STOPPING, + /* S_FINALIZE_JOIN ==> */ S_STOPPING, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_STOPPING, + /* S_RECOVERY ==> */ S_STOPPING, + /* S_RELEASE_DC ==> */ S_STOPPING, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_STOPPING, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_RELEASE_SUCCESS */ + { + /* S_IDLE ==> */ S_RECOVERY, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_RECOVERY, + /* S_FINALIZE_JOIN ==> */ S_RECOVERY, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_RECOVERY, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_PENDING, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_RECOVERY, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_RESTART */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_TE_SUCCESS */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_IDLE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_ROUTER */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_SHUTDOWN */ + { + /* S_IDLE ==> */ S_POLICY_ENGINE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_STOPPING, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STOPPING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_POLICY_ENGINE, + /* S_HALT ==> */ S_ELECTION, + }, + +/* Got an I_STOP */ + { + /* S_IDLE ==> */ S_STOPPING, + /* S_ELECTION ==> */ S_STOPPING, + /* S_INTEGRATION ==> */ S_STOPPING, + /* S_FINALIZE_JOIN ==> */ S_STOPPING, + /* S_NOT_DC ==> */ S_STOPPING, + /* S_POLICY_ENGINE ==> */ S_STOPPING, + /* S_RECOVERY ==> */ S_STOPPING, + /* S_RELEASE_DC ==> */ S_STOPPING, + /* S_STARTING ==> */ S_STOPPING, + /* S_PENDING ==> */ S_STOPPING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_STOPPING, + /* S_HALT ==> */ S_STOPPING, + }, + +/* Got an I_TERMINATE */ + { + /* S_IDLE ==> */ S_TERMINATE, + /* S_ELECTION ==> */ S_TERMINATE, + /* S_INTEGRATION ==> */ S_TERMINATE, + /* S_FINALIZE_JOIN ==> */ S_TERMINATE, + /* S_NOT_DC ==> */ S_TERMINATE, + /* S_POLICY_ENGINE ==> */ S_TERMINATE, + /* S_RECOVERY ==> */ S_TERMINATE, + /* S_RELEASE_DC ==> */ S_TERMINATE, + /* S_STARTING ==> */ S_TERMINATE, + /* S_PENDING ==> */ S_TERMINATE, + /* S_STOPPING ==> */ S_TERMINATE, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TERMINATE, + /* S_HALT ==> */ S_TERMINATE, + }, + +/* Got an I_STARTUP */ + { + /* S_IDLE ==> */ S_RECOVERY, + /* S_ELECTION ==> */ S_RECOVERY, + /* S_INTEGRATION ==> */ S_RECOVERY, + /* S_FINALIZE_JOIN ==> */ S_RECOVERY, + /* S_NOT_DC ==> */ S_RECOVERY, + /* S_POLICY_ENGINE ==> */ S_RECOVERY, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_RECOVERY, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_PE_SUCCESS */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_JOIN_OFFER */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_PENDING, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_JOIN_REQUEST */ + { + /* S_IDLE ==> */ S_INTEGRATION, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_INTEGRATION, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_INTEGRATION, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_JOIN_RESULT */ + { + /* S_IDLE ==> */ S_INTEGRATION, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_PENDING, + /* S_POLICY_ENGINE ==> */ S_INTEGRATION, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_RECOVERY, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_WAIT_FOR_EVENT */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_DC_HEARTBEAT */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_LRM_EVENT */ + { + /* S_IDLE ==> */ S_IDLE, + /* S_ELECTION ==> */ S_ELECTION, + /* S_INTEGRATION ==> */ S_INTEGRATION, + /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, + /* S_NOT_DC ==> */ S_NOT_DC, + /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_PENDING */ + { + /* S_IDLE ==> */ S_PENDING, + /* S_ELECTION ==> */ S_PENDING, + /* S_INTEGRATION ==> */ S_PENDING, + /* S_FINALIZE_JOIN ==> */ S_PENDING, + /* S_NOT_DC ==> */ S_PENDING, + /* S_POLICY_ENGINE ==> */ S_PENDING, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_PENDING, + /* S_PENDING ==> */ S_PENDING, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_PENDING, + /* S_HALT ==> */ S_HALT, + }, + +/* Got an I_HALT */ + { + /* S_IDLE ==> */ S_HALT, + /* S_ELECTION ==> */ S_HALT, + /* S_INTEGRATION ==> */ S_HALT, + /* S_FINALIZE_JOIN ==> */ S_HALT, + /* S_NOT_DC ==> */ S_HALT, + /* S_POLICY_ENGINE ==> */ S_HALT, + /* S_RECOVERY ==> */ S_RECOVERY, + /* S_RELEASE_DC ==> */ S_RELEASE_DC, + /* S_STARTING ==> */ S_STARTING, + /* S_PENDING ==> */ S_HALT, + /* S_STOPPING ==> */ S_STOPPING, + /* S_TERMINATE ==> */ S_TERMINATE, + /* S_TRANSITION_ENGINE ==> */ S_HALT, + /* S_HALT ==> */ S_HALT, + }, +}; + +/* + * The action table. Each entry is a set of actions to take or-ed + * together. Like the state table, the rows are inputs, and + * the columns are states. + */ + +/* NOTE: In the fsa, the actions are extracted then state is updated. */ + +static const uint64_t fsa_actions[MAXINPUT][MAXSTATE] = { + +/* Got an I_NULL */ + { + /* S_IDLE ==> */ A_NOTHING, + /* S_ELECTION ==> */ A_NOTHING, + /* S_INTEGRATION ==> */ A_NOTHING, + /* S_FINALIZE_JOIN ==> */ A_NOTHING, + /* S_NOT_DC ==> */ A_NOTHING, + /* S_POLICY_ENGINE ==> */ A_NOTHING, + /* S_RECOVERY ==> */ A_NOTHING, + /* S_RELEASE_DC ==> */ A_NOTHING, + /* S_STARTING ==> */ A_NOTHING, + /* S_PENDING ==> */ A_NOTHING, + /* S_STOPPING ==> */ A_NOTHING, + /* S_TERMINATE ==> */ A_NOTHING, + /* S_TRANSITION_ENGINE ==> */ A_NOTHING, + /* S_HALT ==> */ A_NOTHING, + }, + +/* Got an I_CIB_OP */ + { + /* S_IDLE ==> */ A_ERROR, + /* S_ELECTION ==> */ A_ERROR, + /* S_INTEGRATION ==> */ A_ERROR, + /* S_FINALIZE_JOIN ==> */ A_ERROR, + /* S_NOT_DC ==> */ A_ERROR, + /* S_POLICY_ENGINE ==> */ A_ERROR, + /* S_RECOVERY ==> */ A_ERROR, + /* S_RELEASE_DC ==> */ A_ERROR, + /* S_STARTING ==> */ A_ERROR, + /* S_PENDING ==> */ A_ERROR, + /* S_STOPPING ==> */ A_ERROR, + /* S_TERMINATE ==> */ A_ERROR, + /* S_TRANSITION_ENGINE ==> */ A_ERROR, + /* S_HALT ==> */ A_ERROR, + }, + +/* Got an I_CIB_UPDATE */ + { + /* S_IDLE ==> */ A_LOG, + /* S_ELECTION ==> */ A_LOG, + /* S_INTEGRATION ==> */ A_WARN, + /* S_FINALIZE_JOIN ==> */ A_WARN, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_LOG, + /* S_RECOVERY ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_LOG, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_DC_TIMEOUT */ + { + /* S_IDLE ==> */ A_WARN, + /* S_ELECTION ==> */ A_ELECTION_VOTE, + /* S_INTEGRATION ==> */ A_WARN, + /* S_FINALIZE_JOIN ==> */ A_WARN, + /* S_NOT_DC ==> */ A_ELECTION_VOTE | A_WARN, + /* S_POLICY_ENGINE ==> */ A_WARN, + /* S_RECOVERY ==> */ A_NOTHING, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_ELECTION_VOTE | A_WARN, + /* S_STOPPING ==> */ A_NOTHING, + /* S_TERMINATE ==> */ A_NOTHING, + /* S_TRANSITION_ENGINE ==> */ A_TE_CANCEL | A_WARN, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_ELECTION */ + { + /* S_IDLE ==> */ A_ELECTION_VOTE, + /* S_ELECTION ==> */ A_ELECTION_VOTE, + /* S_INTEGRATION ==> */ A_ELECTION_VOTE, + /* S_FINALIZE_JOIN ==> */ A_ELECTION_VOTE, + /* S_NOT_DC ==> */ A_ELECTION_VOTE, + /* S_POLICY_ENGINE ==> */ A_ELECTION_VOTE, + /* S_RECOVERY ==> */ A_LOG, + /* S_RELEASE_DC ==> */ A_LOG, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_ELECTION_VOTE, + /* S_STOPPING ==> */ A_LOG, + /* S_TERMINATE ==> */ A_LOG, + /* S_TRANSITION_ENGINE ==> */ A_ELECTION_VOTE, + /* S_HALT ==> */ A_ELECTION_VOTE, + }, + +/* Got an I_PE_CALC */ + { + /* S_IDLE ==> */ A_PE_INVOKE, + /* S_ELECTION ==> */ A_NOTHING, + /* S_INTEGRATION ==> */ A_NOTHING, + /* S_FINALIZE_JOIN ==> */ A_NOTHING, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_PE_INVOKE, + /* S_RECOVERY ==> */ A_NOTHING, + /* S_RELEASE_DC ==> */ A_NOTHING, + /* S_STARTING ==> */ A_ERROR, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_ERROR, + /* S_TRANSITION_ENGINE ==> */ A_PE_INVOKE, + /* S_HALT ==> */ A_ERROR, + }, + +/* Got an I_RELEASE_DC */ + { + /* S_IDLE ==> */ O_RELEASE, + /* S_ELECTION ==> */ O_RELEASE, + /* S_INTEGRATION ==> */ O_RELEASE | A_WARN, + /* S_FINALIZE_JOIN ==> */ O_RELEASE | A_WARN, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ O_RELEASE | A_WARN, + /* S_RECOVERY ==> */ O_RELEASE, + /* S_RELEASE_DC ==> */ O_RELEASE | A_WARN, + /* S_STARTING ==> */ A_ERROR, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ O_RELEASE | A_WARN, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_ELECTION_DC */ + { + /* S_IDLE ==> */ A_WARN | A_ELECTION_VOTE, + /* S_ELECTION ==> */ + A_LOG | A_DC_TAKEOVER | A_PE_START | A_TE_START | A_DC_JOIN_OFFER_ALL | A_DC_TIMER_STOP, + /* S_INTEGRATION ==> */ A_WARN | A_ELECTION_VOTE | A_DC_JOIN_OFFER_ALL, + /* S_FINALIZE_JOIN ==> */ A_WARN | A_ELECTION_VOTE | A_DC_JOIN_OFFER_ALL, + /* S_NOT_DC ==> */ A_LOG | A_ELECTION_VOTE, + /* S_POLICY_ENGINE ==> */ A_WARN | A_ELECTION_VOTE, + /* S_RECOVERY ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_WARN | A_ELECTION_VOTE, + /* S_STARTING ==> */ A_LOG | A_WARN, + /* S_PENDING ==> */ A_LOG | A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_WARN | A_ELECTION_VOTE, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_ERROR */ + { + /* S_IDLE ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START, + /* S_ELECTION ==> */ A_ERROR | A_RECOVER | O_RELEASE, + /* S_INTEGRATION ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START, + /* S_FINALIZE_JOIN ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START, + /* S_NOT_DC ==> */ A_ERROR | A_RECOVER, + /* S_POLICY_ENGINE ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START, + /* S_RECOVERY ==> */ A_ERROR | O_RELEASE, + /* S_RELEASE_DC ==> */ A_ERROR | A_RECOVER, + /* S_STARTING ==> */ A_ERROR | A_RECOVER, + /* S_PENDING ==> */ A_ERROR | A_RECOVER, + /* S_STOPPING ==> */ A_ERROR | A_EXIT_1, + /* S_TERMINATE ==> */ A_ERROR | A_EXIT_1, + /* S_TRANSITION_ENGINE ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START, + /* S_HALT ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START, + }, + +/* Got an I_FAIL */ + { + /* S_IDLE ==> */ A_WARN, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_WARN | A_DC_JOIN_OFFER_ALL, + /* S_FINALIZE_JOIN ==> */ A_WARN | A_DC_JOIN_OFFER_ALL, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_WARN | A_DC_JOIN_OFFER_ALL | A_TE_CANCEL, + /* S_RECOVERY ==> */ A_WARN | O_RELEASE, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN | A_EXIT_1, + /* S_TRANSITION_ENGINE ==> */ A_WARN | O_LRM_RECONNECT | A_PE_INVOKE | A_TE_CANCEL, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_INTEGRATED */ + { + /* S_IDLE ==> */ A_NOTHING, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_DC_JOIN_FINALIZE, + /* S_FINALIZE_JOIN ==> */ A_WARN, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_NOTHING, + /* S_RECOVERY ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_NOTHING, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_FINALIZED */ + { + /* S_IDLE ==> */ A_NOTHING, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_WARN, + /* S_FINALIZE_JOIN ==> */ A_DC_JOIN_FINAL | A_TE_CANCEL, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_NOTHING, + /* S_RECOVERY ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_NOTHING, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_NODE_JOIN */ + { + /* S_IDLE ==> */ A_TE_HALT | A_DC_JOIN_OFFER_ONE, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_DC_JOIN_OFFER_ONE, + /* S_FINALIZE_JOIN ==> */ A_DC_JOIN_OFFER_ONE, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_DC_JOIN_OFFER_ONE, + /* S_RECOVERY ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_TE_HALT | A_DC_JOIN_OFFER_ONE, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_NOT_DC */ + { + /* S_IDLE ==> */ A_WARN | O_RELEASE, + /* S_ELECTION ==> */ A_ERROR | A_ELECTION_START | A_DC_TIMER_STOP, + /* S_INTEGRATION ==> */ A_ERROR | O_RELEASE, + /* S_FINALIZE_JOIN ==> */ A_ERROR | O_RELEASE, + /* S_NOT_DC ==> */ A_LOG, + /* S_POLICY_ENGINE ==> */ A_ERROR | O_RELEASE, + /* S_RECOVERY ==> */ A_ERROR | O_RELEASE, + /* S_RELEASE_DC ==> */ A_ERROR | O_RELEASE, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_LOG | A_DC_TIMER_STOP, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_ERROR | O_RELEASE, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_RECOVERED */ + { + /* S_IDLE ==> */ A_WARN, + /* S_ELECTION ==> */ A_ELECTION_VOTE, + /* S_INTEGRATION ==> */ A_WARN, + /* S_FINALIZE_JOIN ==> */ A_WARN, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_WARN, + /* S_RECOVERY ==> */ A_LOG, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_WARN, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_RELEASE_FAIL */ + { + /* S_IDLE ==> */ A_WARN, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_WARN, + /* S_FINALIZE_JOIN ==> */ A_WARN, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_NOTHING, + /* S_RECOVERY ==> */ A_WARN | A_SHUTDOWN_REQ, + /* S_RELEASE_DC ==> */ A_NOTHING, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_WARN, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_RELEASE_SUCCESS */ + { + /* S_IDLE ==> */ A_WARN, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_WARN, + /* S_FINALIZE_JOIN ==> */ A_WARN, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_WARN, + /* S_RECOVERY ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_LOG, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_LOG, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_WARN, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_RESTART */ + { + /* S_IDLE ==> */ A_NOTHING, + /* S_ELECTION ==> */ A_LOG | A_ELECTION_VOTE, + /* S_INTEGRATION ==> */ A_LOG | A_DC_JOIN_OFFER_ALL, + /* S_FINALIZE_JOIN ==> */ A_LOG | A_DC_JOIN_FINALIZE, + /* S_NOT_DC ==> */ A_LOG | A_NOTHING, + /* S_POLICY_ENGINE ==> */ A_LOG | A_PE_INVOKE, + /* S_RECOVERY ==> */ A_LOG | A_RECOVER | O_RELEASE, + /* S_RELEASE_DC ==> */ A_LOG | O_RELEASE, + /* S_STARTING ==> */ A_LOG, + /* S_PENDING ==> */ A_LOG, + /* S_STOPPING ==> */ A_LOG, + /* S_TERMINATE ==> */ A_LOG, + /* S_TRANSITION_ENGINE ==> */ A_LOG | A_TE_INVOKE, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_TE_SUCCESS */ + { + /* S_IDLE ==> */ A_LOG, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_WARN, + /* S_FINALIZE_JOIN ==> */ A_WARN, + /* S_NOT_DC ==> */ A_ERROR, + /* S_POLICY_ENGINE ==> */ A_WARN, + /* S_RECOVERY ==> */ A_RECOVER | A_WARN, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ A_ERROR, + /* S_PENDING ==> */ A_ERROR, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_LOG, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_ROUTER */ + { + /* S_IDLE ==> */ A_MSG_ROUTE, + /* S_ELECTION ==> */ A_MSG_ROUTE, + /* S_INTEGRATION ==> */ A_MSG_ROUTE, + /* S_FINALIZE_JOIN ==> */ A_MSG_ROUTE, + /* S_NOT_DC ==> */ A_MSG_ROUTE, + /* S_POLICY_ENGINE ==> */ A_MSG_ROUTE, + /* S_RECOVERY ==> */ A_MSG_ROUTE, + /* S_RELEASE_DC ==> */ A_MSG_ROUTE, + /* S_STARTING ==> */ A_MSG_ROUTE, + /* S_PENDING ==> */ A_MSG_ROUTE, + /* S_STOPPING ==> */ A_MSG_ROUTE, + /* S_TERMINATE ==> */ A_MSG_ROUTE, + /* S_TRANSITION_ENGINE ==> */ A_MSG_ROUTE, + /* S_HALT ==> */ A_WARN | A_MSG_ROUTE, + }, + +/* Got an I_SHUTDOWN */ + { + /* S_IDLE ==> */ A_LOG | A_SHUTDOWN_REQ, + /* S_ELECTION ==> */ A_LOG | A_SHUTDOWN_REQ | A_ELECTION_VOTE, + /* S_INTEGRATION ==> */ A_LOG | A_SHUTDOWN_REQ, + /* S_FINALIZE_JOIN ==> */ A_LOG | A_SHUTDOWN_REQ, + /* S_NOT_DC ==> */ A_SHUTDOWN_REQ, + /* S_POLICY_ENGINE ==> */ A_LOG | A_SHUTDOWN_REQ, + /* S_RECOVERY ==> */ A_WARN | O_EXIT | O_RELEASE, + /* S_RELEASE_DC ==> */ A_WARN | A_SHUTDOWN_REQ, + /* S_STARTING ==> */ A_WARN | O_EXIT, + /* S_PENDING ==> */ A_SHUTDOWN_REQ, + /* S_STOPPING ==> */ A_LOG, + /* S_TERMINATE ==> */ A_LOG, + /* S_TRANSITION_ENGINE ==> */ A_WARN | A_SHUTDOWN_REQ, + /* S_HALT ==> */ A_WARN | A_ELECTION_START | A_SHUTDOWN_REQ, + }, + +/* Got an I_STOP */ + { + /* S_IDLE ==> */ A_ERROR | O_RELEASE | O_EXIT, + /* S_ELECTION ==> */ O_RELEASE | O_EXIT, + /* S_INTEGRATION ==> */ A_WARN | O_RELEASE | O_EXIT, + /* S_FINALIZE_JOIN ==> */ A_ERROR | O_RELEASE | O_EXIT, + /* S_NOT_DC ==> */ O_EXIT, + /* S_POLICY_ENGINE ==> */ A_WARN | O_RELEASE | O_EXIT, + /* S_RECOVERY ==> */ A_ERROR | O_RELEASE | O_EXIT, + /* S_RELEASE_DC ==> */ A_ERROR | O_RELEASE | O_EXIT, + /* S_STARTING ==> */ O_EXIT, + /* S_PENDING ==> */ O_EXIT, + /* S_STOPPING ==> */ O_EXIT, + /* S_TERMINATE ==> */ A_ERROR | A_EXIT_1, + /* S_TRANSITION_ENGINE ==> */ A_LOG | O_RELEASE | O_EXIT, + /* S_HALT ==> */ O_RELEASE | O_EXIT | A_WARN, + }, + +/* Got an I_TERMINATE */ + { + /* S_IDLE ==> */ A_ERROR | O_EXIT, + /* S_ELECTION ==> */ A_ERROR | O_EXIT, + /* S_INTEGRATION ==> */ A_ERROR | O_EXIT, + /* S_FINALIZE_JOIN ==> */ A_ERROR | O_EXIT, + /* S_NOT_DC ==> */ A_ERROR | O_EXIT, + /* S_POLICY_ENGINE ==> */ A_ERROR | O_EXIT, + /* S_RECOVERY ==> */ A_ERROR | O_EXIT, + /* S_RELEASE_DC ==> */ A_ERROR | O_EXIT, + /* S_STARTING ==> */ O_EXIT, + /* S_PENDING ==> */ A_ERROR | O_EXIT, + /* S_STOPPING ==> */ O_EXIT, + /* S_TERMINATE ==> */ O_EXIT, + /* S_TRANSITION_ENGINE ==> */ A_ERROR | O_EXIT, + /* S_HALT ==> */ A_ERROR | O_EXIT, + }, + +/* Got an I_STARTUP */ + { + /* S_IDLE ==> */ A_WARN, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_WARN, + /* S_FINALIZE_JOIN ==> */ A_WARN, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_WARN, + /* S_RECOVERY ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ + A_LOG | A_STARTUP | A_CIB_START | A_LRM_CONNECT | A_HA_CONNECT | A_READCONFIG | A_STARTED, + /* S_PENDING ==> */ A_LOG, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_WARN, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_PE_SUCCESS */ + { + /* S_IDLE ==> */ A_LOG, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_WARN, + /* S_FINALIZE_JOIN ==> */ A_WARN, + /* S_NOT_DC ==> */ A_NOTHING, + /* S_POLICY_ENGINE ==> */ A_TE_INVOKE, + /* S_RECOVERY ==> */ A_RECOVER | A_LOG, + /* S_RELEASE_DC ==> */ A_LOG, + /* S_STARTING ==> */ A_ERROR, + /* S_PENDING ==> */ A_LOG, + /* S_STOPPING ==> */ A_ERROR, + /* S_TERMINATE ==> */ A_ERROR, + /* S_TRANSITION_ENGINE ==> */ A_LOG, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_JOIN_OFFER */ + { + /* S_IDLE ==> */ A_WARN | A_CL_JOIN_REQUEST, + /* S_ELECTION ==> */ A_WARN | A_ELECTION_VOTE, + /* S_INTEGRATION ==> */ A_CL_JOIN_REQUEST, + /* S_FINALIZE_JOIN ==> */ A_CL_JOIN_REQUEST, + /* S_NOT_DC ==> */ A_CL_JOIN_REQUEST | A_DC_TIMER_STOP, + /* S_POLICY_ENGINE ==> */ A_WARN | A_CL_JOIN_REQUEST, + /* S_RECOVERY ==> */ A_WARN | A_CL_JOIN_REQUEST | A_DC_TIMER_STOP, + /* S_RELEASE_DC ==> */ A_WARN | A_CL_JOIN_REQUEST | A_DC_TIMER_STOP, + /* S_STARTING ==> */ A_LOG, + /* S_PENDING ==> */ A_CL_JOIN_REQUEST | A_DC_TIMER_STOP, + /* S_STOPPING ==> */ A_LOG, + /* S_TERMINATE ==> */ A_LOG, + /* S_TRANSITION_ENGINE ==> */ A_WARN | A_CL_JOIN_REQUEST, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_JOIN_REQUEST */ + { + /* S_IDLE ==> */ A_DC_JOIN_OFFER_ONE, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_DC_JOIN_PROCESS_REQ, + /* S_FINALIZE_JOIN ==> */ A_DC_JOIN_OFFER_ONE, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_DC_JOIN_OFFER_ONE, + /* S_RECOVERY ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_DC_JOIN_OFFER_ONE, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_JOIN_RESULT */ + { + /* S_IDLE ==> */ A_ERROR | A_TE_HALT | A_DC_JOIN_OFFER_ALL, + /* S_ELECTION ==> */ A_LOG, + /* S_INTEGRATION ==> */ A_LOG | A_CL_JOIN_RESULT | A_DC_JOIN_PROCESS_ACK, + /* S_FINALIZE_JOIN ==> */ A_CL_JOIN_RESULT | A_DC_JOIN_PROCESS_ACK, + /* S_NOT_DC ==> */ A_ERROR | A_CL_JOIN_ANNOUNCE, + /* S_POLICY_ENGINE ==> */ A_ERROR | A_TE_HALT | A_DC_JOIN_OFFER_ALL, + /* S_RECOVERY ==> */ A_LOG, + /* S_RELEASE_DC ==> */ A_LOG, + /* S_STARTING ==> */ A_ERROR, + /* S_PENDING ==> */ A_CL_JOIN_RESULT, + /* S_STOPPING ==> */ A_ERROR, + /* S_TERMINATE ==> */ A_ERROR, + /* S_TRANSITION_ENGINE ==> */ A_ERROR | A_TE_HALT | A_DC_JOIN_OFFER_ALL, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_WAIT_FOR_EVENT */ + { + /* S_IDLE ==> */ A_LOG, + /* S_ELECTION ==> */ A_LOG, + /* S_INTEGRATION ==> */ A_LOG, + /* S_FINALIZE_JOIN ==> */ A_LOG, + /* S_NOT_DC ==> */ A_LOG, + /* S_POLICY_ENGINE ==> */ A_LOG, + /* S_RECOVERY ==> */ A_LOG, + /* S_RELEASE_DC ==> */ A_LOG, + /* S_STARTING ==> */ A_LOG, + /* S_PENDING ==> */ A_LOG, + /* S_STOPPING ==> */ A_LOG, + /* S_TERMINATE ==> */ A_LOG, + /* S_TRANSITION_ENGINE ==> */ A_LOG, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_DC_HEARTBEAT */ + { + /* S_IDLE ==> */ A_ERROR, + /* S_ELECTION ==> */ A_WARN | A_ELECTION_VOTE, + /* S_INTEGRATION ==> */ A_ERROR, + /* S_FINALIZE_JOIN ==> */ A_ERROR, + /* S_NOT_DC ==> */ A_NOTHING, + /* S_POLICY_ENGINE ==> */ A_ERROR, + /* S_RECOVERY ==> */ A_NOTHING, + /* S_RELEASE_DC ==> */ A_LOG, + /* S_STARTING ==> */ A_LOG, + /* S_PENDING ==> */ A_LOG | A_CL_JOIN_ANNOUNCE, + /* S_STOPPING ==> */ A_NOTHING, + /* S_TERMINATE ==> */ A_NOTHING, + /* S_TRANSITION_ENGINE ==> */ A_ERROR, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_LRM_EVENT */ + { + /* S_IDLE ==> */ A_LRM_EVENT, + /* S_ELECTION ==> */ A_LRM_EVENT, + /* S_INTEGRATION ==> */ A_LRM_EVENT, + /* S_FINALIZE_JOIN ==> */ A_LRM_EVENT, + /* S_NOT_DC ==> */ A_LRM_EVENT, + /* S_POLICY_ENGINE ==> */ A_LRM_EVENT, + /* S_RECOVERY ==> */ A_LRM_EVENT, + /* S_RELEASE_DC ==> */ A_LRM_EVENT, + /* S_STARTING ==> */ A_LRM_EVENT, + /* S_PENDING ==> */ A_LRM_EVENT, + /* S_STOPPING ==> */ A_LRM_EVENT, + /* S_TERMINATE ==> */ A_LRM_EVENT, + /* S_TRANSITION_ENGINE ==> */ A_LRM_EVENT, + /* S_HALT ==> */ A_WARN, + }, + +/* For everyone ending up in S_PENDING, (re)start the DC timer and wait for I_JOIN_OFFER or I_NOT_DC */ +/* Got an I_PENDING */ + { + /* S_IDLE ==> */ O_RELEASE | O_DC_TIMER_RESTART, + /* S_ELECTION ==> */ O_RELEASE | O_DC_TIMER_RESTART, + /* S_INTEGRATION ==> */ O_RELEASE | O_DC_TIMER_RESTART, + /* S_FINALIZE_JOIN ==> */ O_RELEASE | O_DC_TIMER_RESTART, + /* S_NOT_DC ==> */ A_LOG | O_DC_TIMER_RESTART, + /* S_POLICY_ENGINE ==> */ O_RELEASE | O_DC_TIMER_RESTART, + /* S_RECOVERY ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_WARN | O_DC_TIMER_RESTART, + /* S_STARTING ==> */ A_LOG | A_DC_TIMER_START | A_CL_JOIN_QUERY, + /* S_PENDING ==> */ A_LOG | O_DC_TIMER_RESTART, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ O_RELEASE | O_DC_TIMER_RESTART, + /* S_HALT ==> */ A_WARN, + }, + +/* Got an I_HALT */ + { + /* S_IDLE ==> */ A_WARN, + /* S_ELECTION ==> */ A_WARN, + /* S_INTEGRATION ==> */ A_WARN, + /* S_FINALIZE_JOIN ==> */ A_WARN, + /* S_NOT_DC ==> */ A_WARN, + /* S_POLICY_ENGINE ==> */ A_WARN, + /* S_RECOVERY ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_WARN, + /* S_STARTING ==> */ A_WARN, + /* S_PENDING ==> */ A_WARN, + /* S_STOPPING ==> */ A_WARN, + /* S_TERMINATE ==> */ A_WARN, + /* S_TRANSITION_ENGINE ==> */ A_WARN, + /* S_HALT ==> */ A_WARN, + }, +}; + +/*! + * \internal + * \brief Get the next FSA state given an input and the current state + * + * \param[in] input FSA input + * + * \return The next FSA state + */ +enum crmd_fsa_state +controld_fsa_get_next_state(enum crmd_fsa_input input) +{ + return fsa_next_states[input][controld_globals.fsa_state]; +} + +/*! + * \internal + * \brief Get the appropriate FSA action given an input and the current state + * + * \param[in] input FSA input + * + * \return The appropriate FSA action + */ +uint64_t +controld_fsa_get_action(enum crmd_fsa_input input) +{ + return fsa_actions[input][controld_globals.fsa_state]; +} diff --git a/daemons/controld/controld_membership.c b/daemons/controld/controld_membership.c new file mode 100644 index 0000000..1f7e4c0 --- /dev/null +++ b/daemons/controld/controld_membership.c @@ -0,0 +1,457 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +/* put these first so that uuid_t is defined without conflicts */ +#include + +#include + +#include +#include +#include +#include +#include + +#include + +void post_cache_update(int instance); + +extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); + +static void +reap_dead_nodes(gpointer key, gpointer value, gpointer user_data) +{ + crm_node_t *node = value; + + if (crm_is_peer_active(node) == FALSE) { + crm_update_peer_join(__func__, node, crm_join_none); + + if(node && node->uname) { + if (pcmk__str_eq(controld_globals.our_nodename, node->uname, + pcmk__str_casei)) { + crm_err("We're not part of the cluster anymore"); + register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); + + } else if (!AM_I_DC + && pcmk__str_eq(node->uname, controld_globals.dc_name, + pcmk__str_casei)) { + crm_warn("Our DC node (%s) left the cluster", node->uname); + register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); + } + } + + if ((controld_globals.fsa_state == S_INTEGRATION) + || (controld_globals.fsa_state == S_FINALIZE_JOIN)) { + check_join_state(controld_globals.fsa_state, __func__); + } + if ((node != NULL) && (node->uuid != NULL)) { + fail_incompletable_actions(controld_globals.transition_graph, + node->uuid); + } + } +} + +void +post_cache_update(int instance) +{ + xmlNode *no_op = NULL; + + crm_peer_seq = instance; + crm_debug("Updated cache after membership event %d.", instance); + + g_hash_table_foreach(crm_peer_cache, reap_dead_nodes, NULL); + controld_set_fsa_input_flags(R_MEMBERSHIP); + + if (AM_I_DC) { + populate_cib_nodes(node_update_quick | node_update_cluster | node_update_peer | + node_update_expected, __func__); + } + + /* + * If we lost nodes, we should re-check the election status + * Safe to call outside of an election + */ + controld_set_fsa_action_flags(A_ELECTION_CHECK); + controld_trigger_fsa(); + + /* Membership changed, remind everyone we're here. + * This will aid detection of duplicate DCs + */ + no_op = create_request(CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_CRMD, + AM_I_DC ? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD, NULL); + send_cluster_message(NULL, crm_msg_crmd, no_op, FALSE); + free_xml(no_op); +} + +static void +crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + fsa_data_t *msg_data = NULL; + + if (rc == pcmk_ok) { + crm_trace("Node update %d complete", call_id); + + } else if(call_id < pcmk_ok) { + crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id); + crm_log_xml_debug(msg, "failed"); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + + } else { + crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); + crm_log_xml_debug(msg, "failed"); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } +} + +/*! + * \internal + * \brief Create an XML node state tag with updates + * + * \param[in,out] node Node whose state will be used for update + * \param[in] flags Bitmask of node_update_flags indicating what to update + * \param[in,out] parent XML node to contain update (or NULL) + * \param[in] source Who requested the update (only used for logging) + * + * \return Pointer to created node state tag + */ +xmlNode * +create_node_state_update(crm_node_t *node, int flags, xmlNode *parent, + const char *source) +{ + const char *value = NULL; + xmlNode *node_state; + + if (!node->state) { + crm_info("Node update for %s cancelled: no state, not seen yet", node->uname); + return NULL; + } + + node_state = create_xml_node(parent, XML_CIB_TAG_STATE); + + if (pcmk_is_set(node->flags, crm_remote_node)) { + pcmk__xe_set_bool_attr(node_state, XML_NODE_IS_REMOTE, true); + } + + set_uuid(node_state, XML_ATTR_ID, node); + + if (crm_element_value(node_state, XML_ATTR_ID) == NULL) { + crm_info("Node update for %s cancelled: no id", node->uname); + free_xml(node_state); + return NULL; + } + + crm_xml_add(node_state, XML_ATTR_UNAME, node->uname); + + if ((flags & node_update_cluster) && node->state) { + pcmk__xe_set_bool_attr(node_state, XML_NODE_IN_CLUSTER, + pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)); + } + + if (!pcmk_is_set(node->flags, crm_remote_node)) { + if (flags & node_update_peer) { + value = OFFLINESTATUS; + if (pcmk_is_set(node->processes, crm_get_cluster_proc())) { + value = ONLINESTATUS; + } + crm_xml_add(node_state, XML_NODE_IS_PEER, value); + } + + if (flags & node_update_join) { + if (node->join <= crm_join_none) { + value = CRMD_JOINSTATE_DOWN; + } else { + value = CRMD_JOINSTATE_MEMBER; + } + crm_xml_add(node_state, XML_NODE_JOIN_STATE, value); + } + + if (flags & node_update_expected) { + crm_xml_add(node_state, XML_NODE_EXPECTED, node->expected); + } + } + + crm_xml_add(node_state, XML_ATTR_ORIGIN, source); + + return node_state; +} + +static void +remove_conflicting_node_callback(xmlNode * msg, int call_id, int rc, + xmlNode * output, void *user_data) +{ + char *node_uuid = user_data; + + do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE, + "Deletion of the unknown conflicting node \"%s\": %s (rc=%d)", + node_uuid, pcmk_strerror(rc), rc); +} + +static void +search_conflicting_node_callback(xmlNode * msg, int call_id, int rc, + xmlNode * output, void *user_data) +{ + char *new_node_uuid = user_data; + xmlNode *node_xml = NULL; + + if (rc != pcmk_ok) { + if (rc != -ENXIO) { + crm_notice("Searching conflicting nodes for %s failed: %s (%d)", + new_node_uuid, pcmk_strerror(rc), rc); + } + return; + + } else if (output == NULL) { + return; + } + + if (pcmk__str_eq(crm_element_name(output), XML_CIB_TAG_NODE, pcmk__str_casei)) { + node_xml = output; + + } else { + node_xml = pcmk__xml_first_child(output); + } + + for (; node_xml != NULL; node_xml = pcmk__xml_next(node_xml)) { + const char *node_uuid = NULL; + const char *node_uname = NULL; + GHashTableIter iter; + crm_node_t *node = NULL; + gboolean known = FALSE; + + if (!pcmk__str_eq(crm_element_name(node_xml), XML_CIB_TAG_NODE, pcmk__str_casei)) { + continue; + } + + node_uuid = crm_element_value(node_xml, XML_ATTR_ID); + node_uname = crm_element_value(node_xml, XML_ATTR_UNAME); + + if (node_uuid == NULL || node_uname == NULL) { + continue; + } + + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + if (node->uuid + && pcmk__str_eq(node->uuid, node_uuid, pcmk__str_casei) + && node->uname + && pcmk__str_eq(node->uname, node_uname, pcmk__str_casei)) { + + known = TRUE; + break; + } + } + + if (known == FALSE) { + cib_t *cib_conn = controld_globals.cib_conn; + int delete_call_id = 0; + xmlNode *node_state_xml = NULL; + + crm_notice("Deleting unknown node %s/%s which has conflicting uname with %s", + node_uuid, node_uname, new_node_uuid); + + delete_call_id = cib_conn->cmds->remove(cib_conn, XML_CIB_TAG_NODES, + node_xml, cib_scope_local); + fsa_register_cib_callback(delete_call_id, strdup(node_uuid), + remove_conflicting_node_callback); + + node_state_xml = create_xml_node(NULL, XML_CIB_TAG_STATE); + crm_xml_add(node_state_xml, XML_ATTR_ID, node_uuid); + crm_xml_add(node_state_xml, XML_ATTR_UNAME, node_uname); + + delete_call_id = cib_conn->cmds->remove(cib_conn, + XML_CIB_TAG_STATUS, + node_state_xml, + cib_scope_local); + fsa_register_cib_callback(delete_call_id, strdup(node_uuid), + remove_conflicting_node_callback); + free_xml(node_state_xml); + } + } +} + +static void +node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + fsa_data_t *msg_data = NULL; + + if(call_id < pcmk_ok) { + crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id); + crm_log_xml_debug(msg, "update:failed"); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + + } else if(rc < pcmk_ok) { + crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); + crm_log_xml_debug(msg, "update:failed"); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } +} + +void +populate_cib_nodes(enum node_update_flags flags, const char *source) +{ + cib_t *cib_conn = controld_globals.cib_conn; + + int call_id = 0; + gboolean from_hashtable = TRUE; + xmlNode *node_list = create_xml_node(NULL, XML_CIB_TAG_NODES); + +#if SUPPORT_COROSYNC + if (!pcmk_is_set(flags, node_update_quick) && is_corosync_cluster()) { + from_hashtable = pcmk__corosync_add_nodes(node_list); + } +#endif + + if (from_hashtable) { + GHashTableIter iter; + crm_node_t *node = NULL; + GString *xpath = NULL; + + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + xmlNode *new_node = NULL; + + if ((node->uuid != NULL) && (node->uname != NULL)) { + crm_trace("Creating node entry for %s/%s", node->uname, node->uuid); + if (xpath == NULL) { + xpath = g_string_sized_new(512); + } else { + g_string_truncate(xpath, 0); + } + + /* We need both to be valid */ + new_node = create_xml_node(node_list, XML_CIB_TAG_NODE); + crm_xml_add(new_node, XML_ATTR_ID, node->uuid); + crm_xml_add(new_node, XML_ATTR_UNAME, node->uname); + + /* Search and remove unknown nodes with the conflicting uname from CIB */ + pcmk__g_strcat(xpath, + "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION + "/" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE + "[@" XML_ATTR_UNAME "='", node->uname, "']" + "[@" XML_ATTR_ID "!='", node->uuid, "']", NULL); + + call_id = cib_conn->cmds->query(cib_conn, + (const char *) xpath->str, + NULL, + cib_scope_local|cib_xpath); + fsa_register_cib_callback(call_id, strdup(node->uuid), + search_conflicting_node_callback); + } + } + + if (xpath != NULL) { + g_string_free(xpath, TRUE); + } + } + + crm_trace("Populating section from %s", from_hashtable ? "hashtable" : "cluster"); + + if ((controld_update_cib(XML_CIB_TAG_NODES, node_list, cib_scope_local, + node_list_update_callback) == pcmk_rc_ok) + && (crm_peer_cache != NULL) && AM_I_DC) { + /* + * There is no need to update the local CIB with our values if + * we've not seen valid membership data + */ + GHashTableIter iter; + crm_node_t *node = NULL; + + free_xml(node_list); + node_list = create_xml_node(NULL, XML_CIB_TAG_STATUS); + + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + create_node_state_update(node, flags, node_list, source); + } + + if (crm_remote_peer_cache) { + g_hash_table_iter_init(&iter, crm_remote_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + create_node_state_update(node, flags, node_list, source); + } + } + + controld_update_cib(XML_CIB_TAG_STATUS, node_list, cib_scope_local, + crmd_node_update_complete); + } + free_xml(node_list); +} + +static void +cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + fsa_data_t *msg_data = NULL; + + if (rc == pcmk_ok) { + crm_trace("Quorum update %d complete", call_id); + + } else { + crm_err("Quorum update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); + crm_log_xml_debug(msg, "failed"); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } +} + +void +crm_update_quorum(gboolean quorum, gboolean force_update) +{ + bool has_quorum = pcmk_is_set(controld_globals.flags, controld_has_quorum); + + if (quorum) { + controld_set_global_flags(controld_ever_had_quorum); + + } else if (pcmk_all_flags_set(controld_globals.flags, + controld_ever_had_quorum + |controld_no_quorum_suicide)) { + pcmk__panic(__func__); + } + + if (AM_I_DC + && ((has_quorum && !quorum) || (!has_quorum && quorum) + || force_update)) { + xmlNode *update = NULL; + + update = create_xml_node(NULL, XML_TAG_CIB); + crm_xml_add_int(update, XML_ATTR_HAVE_QUORUM, quorum); + crm_xml_add(update, XML_ATTR_DC_UUID, controld_globals.our_uuid); + + crm_debug("Updating quorum status to %s", pcmk__btoa(quorum)); + controld_update_cib(XML_TAG_CIB, update, cib_scope_local, + cib_quorum_update_complete); + free_xml(update); + + /* Quorum changes usually cause a new transition via other activity: + * quorum gained via a node joining will abort via the node join, + * and quorum lost via a node leaving will usually abort via resource + * activity and/or fencing. + * + * However, it is possible that nothing else causes a transition (e.g. + * someone forces quorum via corosync-cmaptcl, or quorum is lost due to + * a node in standby shutting down cleanly), so here ensure a new + * transition is triggered. + */ + if (quorum) { + /* If quorum was gained, abort after a short delay, in case multiple + * nodes are joining around the same time, so the one that brings us + * to quorum doesn't cause all the remaining ones to be fenced. + */ + abort_after_delay(INFINITY, pcmk__graph_restart, "Quorum gained", + 5000); + } else { + abort_transition(INFINITY, pcmk__graph_restart, "Quorum lost", + NULL); + } + } + + if (quorum) { + controld_set_global_flags(controld_has_quorum); + } else { + controld_clear_global_flags(controld_has_quorum); + } +} diff --git a/daemons/controld/controld_membership.h b/daemons/controld/controld_membership.h new file mode 100644 index 0000000..cfe8cee --- /dev/null +++ b/daemons/controld/controld_membership.h @@ -0,0 +1,29 @@ +/* + * Copyright 2012-2021 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ +#ifndef MEMBERSHIP__H +# define MEMBERSHIP__H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +void post_cache_update(int instance); + +extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); + +void controld_destroy_failed_sync_table(void); +void controld_remove_failed_sync_node(const char *node_name); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c new file mode 100644 index 0000000..54b27ec --- /dev/null +++ b/daemons/controld/controld_messages.c @@ -0,0 +1,1307 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +extern void crm_shutdown(int nsig); + +static enum crmd_fsa_input handle_message(xmlNode *msg, + enum crmd_fsa_cause cause); +static void handle_response(xmlNode *stored_msg); +static enum crmd_fsa_input handle_request(xmlNode *stored_msg, + enum crmd_fsa_cause cause); +static enum crmd_fsa_input handle_shutdown_request(xmlNode *stored_msg); +static void send_msg_via_ipc(xmlNode * msg, const char *sys); + +/* debug only, can wrap all it likes */ +static int last_data_id = 0; + +void +register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, + fsa_data_t * cur_data, void *new_data, const char *raised_from) +{ + /* save the current actions if any */ + if (controld_globals.fsa_actions != A_NOTHING) { + register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL, + I_NULL, cur_data ? cur_data->data : NULL, + controld_globals.fsa_actions, TRUE, __func__); + } + + /* reset the action list */ + crm_info("Resetting the current action list"); + fsa_dump_actions(controld_globals.fsa_actions, "Drop"); + controld_globals.fsa_actions = A_NOTHING; + + /* register the error */ + register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from); +} + +void +register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, + void *data, uint64_t with_actions, + gboolean prepend, const char *raised_from) +{ + unsigned old_len = g_list_length(controld_globals.fsa_message_queue); + fsa_data_t *fsa_data = NULL; + + if (raised_from == NULL) { + raised_from = ""; + } + + if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) { + /* no point doing anything */ + crm_err("Cannot add entry to queue: no input and no action"); + return; + } + + if (input == I_WAIT_FOR_EVENT) { + controld_set_global_flags(controld_fsa_is_stalled); + crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d", + raised_from, fsa_cause2string(cause), data, old_len); + + if (old_len > 0) { + fsa_dump_queue(LOG_TRACE); + prepend = FALSE; + } + + if (data == NULL) { + controld_set_fsa_action_flags(with_actions); + fsa_dump_actions(with_actions, "Restored"); + return; + } + + /* Store everything in the new event and reset + * controld_globals.fsa_actions + */ + with_actions |= controld_globals.fsa_actions; + controld_globals.fsa_actions = A_NOTHING; + } + + last_data_id++; + crm_trace("%s %s FSA input %d (%s) due to %s, %s data", + raised_from, (prepend? "prepended" : "appended"), last_data_id, + fsa_input2string(input), fsa_cause2string(cause), + (data? "with" : "without")); + + fsa_data = calloc(1, sizeof(fsa_data_t)); + fsa_data->id = last_data_id; + fsa_data->fsa_input = input; + fsa_data->fsa_cause = cause; + fsa_data->origin = raised_from; + fsa_data->data = NULL; + fsa_data->data_type = fsa_dt_none; + fsa_data->actions = with_actions; + + if (with_actions != A_NOTHING) { + crm_trace("Adding actions %.16llx to input", + (unsigned long long) with_actions); + } + + if (data != NULL) { + switch (cause) { + case C_FSA_INTERNAL: + case C_CRMD_STATUS_CALLBACK: + case C_IPC_MESSAGE: + case C_HA_MESSAGE: + CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL, + crm_err("Bogus data from %s", raised_from)); + crm_trace("Copying %s data from %s as cluster message data", + fsa_cause2string(cause), raised_from); + fsa_data->data = copy_ha_msg_input(data); + fsa_data->data_type = fsa_dt_ha_msg; + break; + + case C_LRM_OP_CALLBACK: + crm_trace("Copying %s data from %s as lrmd_event_data_t", + fsa_cause2string(cause), raised_from); + fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data); + fsa_data->data_type = fsa_dt_lrm; + break; + + case C_TIMER_POPPED: + case C_SHUTDOWN: + case C_UNKNOWN: + case C_STARTUP: + crm_crit("Copying %s data (from %s) is not yet implemented", + fsa_cause2string(cause), raised_from); + crmd_exit(CRM_EX_SOFTWARE); + break; + } + } + + /* make sure to free it properly later */ + if (prepend) { + controld_globals.fsa_message_queue + = g_list_prepend(controld_globals.fsa_message_queue, fsa_data); + } else { + controld_globals.fsa_message_queue + = g_list_append(controld_globals.fsa_message_queue, fsa_data); + } + + crm_trace("FSA message queue length is %d", + g_list_length(controld_globals.fsa_message_queue)); + + /* fsa_dump_queue(LOG_TRACE); */ + + if (old_len == g_list_length(controld_globals.fsa_message_queue)) { + crm_err("Couldn't add message to the queue"); + } + + if (input != I_WAIT_FOR_EVENT) { + controld_trigger_fsa(); + } +} + +void +fsa_dump_queue(int log_level) +{ + int offset = 0; + + for (GList *iter = controld_globals.fsa_message_queue; iter != NULL; + iter = iter->next) { + fsa_data_t *data = (fsa_data_t *) iter->data; + + do_crm_log_unlikely(log_level, + "queue[%d.%d]: input %s raised by %s(%p.%d)\t(cause=%s)", + offset++, data->id, fsa_input2string(data->fsa_input), + data->origin, data->data, data->data_type, + fsa_cause2string(data->fsa_cause)); + } +} + +ha_msg_input_t * +copy_ha_msg_input(ha_msg_input_t * orig) +{ + ha_msg_input_t *copy = calloc(1, sizeof(ha_msg_input_t)); + + CRM_ASSERT(copy != NULL); + copy->msg = (orig && orig->msg)? copy_xml(orig->msg) : NULL; + copy->xml = get_message_xml(copy->msg, F_CRM_DATA); + return copy; +} + +void +delete_fsa_input(fsa_data_t * fsa_data) +{ + lrmd_event_data_t *op = NULL; + xmlNode *foo = NULL; + + if (fsa_data == NULL) { + return; + } + crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause)); + + if (fsa_data->data != NULL) { + switch (fsa_data->data_type) { + case fsa_dt_ha_msg: + delete_ha_msg_input(fsa_data->data); + break; + + case fsa_dt_xml: + foo = fsa_data->data; + free_xml(foo); + break; + + case fsa_dt_lrm: + op = (lrmd_event_data_t *) fsa_data->data; + lrmd_free_event(op); + break; + + case fsa_dt_none: + if (fsa_data->data != NULL) { + crm_err("Don't know how to free %s data from %s", + fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); + crmd_exit(CRM_EX_SOFTWARE); + } + break; + } + crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause)); + } + + free(fsa_data); +} + +/* returns the next message */ +fsa_data_t * +get_message(void) +{ + fsa_data_t *message + = (fsa_data_t *) controld_globals.fsa_message_queue->data; + + controld_globals.fsa_message_queue + = g_list_remove(controld_globals.fsa_message_queue, message); + crm_trace("Processing input %d", message->id); + return message; +} + +void * +fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller) +{ + void *ret_val = NULL; + + if (fsa_data == NULL) { + crm_err("%s: No FSA data available", caller); + + } else if (fsa_data->data == NULL) { + crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin); + + } else if (fsa_data->data_type != a_type) { + crm_crit("%s: Message data was the wrong type! %d vs. requested=%d. Origin: %s", + caller, fsa_data->data_type, a_type, fsa_data->origin); + CRM_ASSERT(fsa_data->data_type == a_type); + } else { + ret_val = fsa_data->data; + } + + return ret_val; +} + +/* A_MSG_ROUTE */ +void +do_msg_route(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); + + route_message(msg_data->fsa_cause, input->msg); +} + +void +route_message(enum crmd_fsa_cause cause, xmlNode * input) +{ + ha_msg_input_t fsa_input; + enum crmd_fsa_input result = I_NULL; + + fsa_input.msg = input; + CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return); + + /* try passing the buck first */ + if (relay_message(input, cause == C_IPC_MESSAGE)) { + return; + } + + /* handle locally */ + result = handle_message(input, cause); + + /* done or process later? */ + switch (result) { + case I_NULL: + case I_CIB_OP: + case I_ROUTER: + case I_NODE_JOIN: + case I_JOIN_REQUEST: + case I_JOIN_RESULT: + break; + default: + /* Defering local processing of message */ + register_fsa_input_later(cause, result, &fsa_input); + return; + } + + if (result != I_NULL) { + /* add to the front of the queue */ + register_fsa_input(cause, result, &fsa_input); + } +} + +gboolean +relay_message(xmlNode * msg, gboolean originated_locally) +{ + int dest = 1; + bool is_for_dc = false; + bool is_for_dcib = false; + bool is_for_te = false; + bool is_for_crm = false; + bool is_for_cib = false; + bool is_local = false; + const char *host_to = crm_element_value(msg, F_CRM_HOST_TO); + const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); + const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); + const char *type = crm_element_value(msg, F_TYPE); + const char *task = crm_element_value(msg, F_CRM_TASK); + const char *ref = crm_element_value(msg, XML_ATTR_REFERENCE); + + if (ref == NULL) { + ref = "without reference ID"; + } + + if (msg == NULL) { + crm_warn("Cannot route empty message"); + return TRUE; + + } else if (pcmk__str_eq(task, CRM_OP_HELLO, pcmk__str_casei)) { + crm_trace("No routing needed for hello message %s", ref); + return TRUE; + + } else if (!pcmk__str_eq(type, T_CRM, pcmk__str_casei)) { + crm_warn("Received invalid message %s: type '%s' not '" T_CRM "'", + ref, pcmk__s(type, "")); + crm_log_xml_warn(msg, "[bad message type]"); + return TRUE; + + } else if (sys_to == NULL) { + crm_warn("Received invalid message %s: no subsystem", ref); + crm_log_xml_warn(msg, "[no subsystem]"); + return TRUE; + } + + is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0); + is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0); + is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0); + is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0); + is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0); + + is_local = false; + if (pcmk__str_empty(host_to)) { + if (is_for_dc || is_for_te) { + is_local = false; + + } else if (is_for_crm) { + if (pcmk__strcase_any_of(task, CRM_OP_NODE_INFO, + PCMK__CONTROLD_CMD_NODES, NULL)) { + /* Node info requests do not specify a host, which is normally + * treated as "all hosts", because the whole point is that the + * client may not know the local node name. Always handle these + * requests locally. + */ + is_local = true; + } else { + is_local = !originated_locally; + } + + } else { + is_local = true; + } + + } else if (pcmk__str_eq(controld_globals.our_nodename, host_to, + pcmk__str_casei)) { + is_local = true; + } else if (is_for_crm && pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) { + xmlNode *msg_data = get_message_xml(msg, F_CRM_DATA); + const char *mode = crm_element_value(msg_data, PCMK__XA_MODE); + + if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_casei)) { + // Local delete of an offline node's resource history + is_local = true; + } + } + + if (is_for_dc || is_for_dcib || is_for_te) { + if (AM_I_DC && is_for_te) { + crm_trace("Route message %s locally as transition request", ref); + send_msg_via_ipc(msg, sys_to); + + } else if (AM_I_DC) { + crm_trace("Route message %s locally as DC request", ref); + return FALSE; // More to be done by caller + + } else if (originated_locally && !pcmk__strcase_any_of(sys_from, CRM_SYSTEM_PENGINE, + CRM_SYSTEM_TENGINE, NULL)) { + + if (is_corosync_cluster()) { + dest = text2msg_type(sys_to); + } + crm_trace("Relay message %s to DC", ref); + send_cluster_message(host_to ? crm_get_peer(0, host_to) : NULL, dest, msg, TRUE); + + } else { + /* Neither the TE nor the scheduler should be sending messages + * to DCs on other nodes. By definition, if we are no longer the DC, + * then the scheduler's or TE's data should be discarded. + */ + crm_trace("Discard message %s because we are not DC", ref); + } + + } else if (is_local && (is_for_crm || is_for_cib)) { + crm_trace("Route message %s locally as controller request", ref); + return FALSE; // More to be done by caller + + } else if (is_local) { + crm_trace("Relay message %s locally to %s", + ref, (sys_to? sys_to : "unknown client")); + crm_log_xml_trace(msg, "[IPC relay]"); + send_msg_via_ipc(msg, sys_to); + + } else { + crm_node_t *node_to = NULL; + + if (is_corosync_cluster()) { + dest = text2msg_type(sys_to); + + if (dest == crm_msg_none || dest > crm_msg_stonith_ng) { + dest = crm_msg_crmd; + } + } + + if (host_to) { + node_to = pcmk__search_cluster_node_cache(0, host_to); + if (node_to == NULL) { + crm_warn("Cannot route message %s: Unknown node %s", + ref, host_to); + return TRUE; + } + crm_trace("Relay message %s to %s", + ref, (node_to->uname? node_to->uname : "peer")); + } else { + crm_trace("Broadcast message %s to all peers", ref); + } + send_cluster_message(host_to ? node_to : NULL, dest, msg, TRUE); + } + + return TRUE; // No further processing of message is needed +} + +// Return true if field contains a positive integer +static bool +authorize_version(xmlNode *message_data, const char *field, + const char *client_name, const char *ref, const char *uuid) +{ + const char *version = crm_element_value(message_data, field); + long long version_num; + + if ((pcmk__scan_ll(version, &version_num, -1LL) != pcmk_rc_ok) + || (version_num < 0LL)) { + + crm_warn("Rejected IPC hello from %s: '%s' is not a valid protocol %s " + CRM_XS " ref=%s uuid=%s", + client_name, ((version == NULL)? "" : version), + field, (ref? ref : "none"), uuid); + return false; + } + return true; +} + +/*! + * \internal + * \brief Check whether a client IPC message is acceptable + * + * If a given client IPC message is a hello, "authorize" it by ensuring it has + * valid information such as a protocol version, and return false indicating + * that nothing further needs to be done with the message. If the message is not + * a hello, just return true to indicate it needs further processing. + * + * \param[in] client_msg XML of IPC message + * \param[in,out] curr_client If IPC is not proxied, client that sent message + * \param[in] proxy_session If IPC is proxied, the session ID + * + * \return true if message needs further processing, false if it doesn't + */ +bool +controld_authorize_ipc_message(const xmlNode *client_msg, pcmk__client_t *curr_client, + const char *proxy_session) +{ + xmlNode *message_data = NULL; + const char *client_name = NULL; + const char *op = crm_element_value(client_msg, F_CRM_TASK); + const char *ref = crm_element_value(client_msg, XML_ATTR_REFERENCE); + const char *uuid = (curr_client? curr_client->id : proxy_session); + + if (uuid == NULL) { + crm_warn("IPC message from client rejected: No client identifier " + CRM_XS " ref=%s", (ref? ref : "none")); + goto rejected; + } + + if (!pcmk__str_eq(CRM_OP_HELLO, op, pcmk__str_casei)) { + // Only hello messages need to be authorized + return true; + } + + message_data = get_message_xml(client_msg, F_CRM_DATA); + + client_name = crm_element_value(message_data, "client_name"); + if (pcmk__str_empty(client_name)) { + crm_warn("IPC hello from client rejected: No client name", + CRM_XS " ref=%s uuid=%s", (ref? ref : "none"), uuid); + goto rejected; + } + if (!authorize_version(message_data, "major_version", client_name, ref, + uuid)) { + goto rejected; + } + if (!authorize_version(message_data, "minor_version", client_name, ref, + uuid)) { + goto rejected; + } + + crm_trace("Validated IPC hello from client %s", client_name); + if (curr_client) { + curr_client->userdata = strdup(client_name); + } + controld_trigger_fsa(); + return false; + +rejected: + if (curr_client) { + qb_ipcs_disconnect(curr_client->ipcs); + } + return false; +} + +static enum crmd_fsa_input +handle_message(xmlNode *msg, enum crmd_fsa_cause cause) +{ + const char *type = NULL; + + CRM_CHECK(msg != NULL, return I_NULL); + + type = crm_element_value(msg, F_CRM_MSG_TYPE); + if (pcmk__str_eq(type, XML_ATTR_REQUEST, pcmk__str_none)) { + return handle_request(msg, cause); + + } else if (pcmk__str_eq(type, XML_ATTR_RESPONSE, pcmk__str_none)) { + handle_response(msg); + return I_NULL; + } + + crm_err("Unknown message type: %s", type); + return I_NULL; +} + +static enum crmd_fsa_input +handle_failcount_op(xmlNode * stored_msg) +{ + const char *rsc = NULL; + const char *uname = NULL; + const char *op = NULL; + char *interval_spec = NULL; + guint interval_ms = 0; + gboolean is_remote_node = FALSE; + xmlNode *xml_op = get_message_xml(stored_msg, F_CRM_DATA); + + if (xml_op) { + xmlNode *xml_rsc = first_named_child(xml_op, XML_CIB_TAG_RESOURCE); + xmlNode *xml_attrs = first_named_child(xml_op, XML_TAG_ATTRS); + + if (xml_rsc) { + rsc = ID(xml_rsc); + } + if (xml_attrs) { + op = crm_element_value(xml_attrs, + CRM_META "_" XML_RSC_ATTR_CLEAR_OP); + crm_element_value_ms(xml_attrs, + CRM_META "_" XML_RSC_ATTR_CLEAR_INTERVAL, + &interval_ms); + } + } + uname = crm_element_value(xml_op, XML_LRM_ATTR_TARGET); + + if ((rsc == NULL) || (uname == NULL)) { + crm_log_xml_warn(stored_msg, "invalid failcount op"); + return I_NULL; + } + + if (crm_element_value(xml_op, XML_LRM_ATTR_ROUTER_NODE)) { + is_remote_node = TRUE; + } + + crm_debug("Clearing failures for %s-interval %s on %s " + "from attribute manager, CIB, and executor state", + pcmk__readable_interval(interval_ms), rsc, uname); + + if (interval_ms) { + interval_spec = crm_strdup_printf("%ums", interval_ms); + } + update_attrd_clear_failures(uname, rsc, op, interval_spec, is_remote_node); + free(interval_spec); + + controld_cib_delete_last_failure(rsc, uname, op, interval_ms); + + lrm_clear_last_failure(rsc, uname, op, interval_ms); + + return I_NULL; +} + +static enum crmd_fsa_input +handle_lrm_delete(xmlNode *stored_msg) +{ + const char *mode = NULL; + xmlNode *msg_data = get_message_xml(stored_msg, F_CRM_DATA); + + CRM_CHECK(msg_data != NULL, return I_NULL); + + /* CRM_OP_LRM_DELETE has two distinct modes. The default behavior is to + * relay the operation to the affected node, which will unregister the + * resource from the local executor, clear the resource's history from the + * CIB, and do some bookkeeping in the controller. + * + * However, if the affected node is offline, the client will specify + * mode="cib" which means the controller receiving the operation should + * clear the resource's history from the CIB and nothing else. This is used + * to clear shutdown locks. + */ + mode = crm_element_value(msg_data, PCMK__XA_MODE); + if ((mode == NULL) || strcmp(mode, XML_TAG_CIB)) { + // Relay to affected node + crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD); + return I_ROUTER; + + } else { + // Delete CIB history locally (compare with do_lrm_delete()) + const char *from_sys = NULL; + const char *user_name = NULL; + const char *rsc_id = NULL; + const char *node = NULL; + xmlNode *rsc_xml = NULL; + int rc = pcmk_rc_ok; + + rsc_xml = first_named_child(msg_data, XML_CIB_TAG_RESOURCE); + CRM_CHECK(rsc_xml != NULL, return I_NULL); + + rsc_id = ID(rsc_xml); + from_sys = crm_element_value(stored_msg, F_CRM_SYS_FROM); + node = crm_element_value(msg_data, XML_LRM_ATTR_TARGET); + user_name = pcmk__update_acl_user(stored_msg, F_CRM_USER, NULL); + crm_debug("Handling " CRM_OP_LRM_DELETE " for %s on %s locally%s%s " + "(clearing CIB resource history only)", rsc_id, node, + (user_name? " for user " : ""), (user_name? user_name : "")); + rc = controld_delete_resource_history(rsc_id, node, user_name, + cib_dryrun|cib_sync_call); + if (rc == pcmk_rc_ok) { + rc = controld_delete_resource_history(rsc_id, node, user_name, + crmd_cib_smart_opt()); + } + + //Notify client and tengine.(Only notify tengine if mode = "cib" and CRM_OP_LRM_DELETE.) + if (from_sys) { + lrmd_event_data_t *op = NULL; + const char *from_host = crm_element_value(stored_msg, + F_CRM_HOST_FROM); + const char *transition; + + if (strcmp(from_sys, CRM_SYSTEM_TENGINE)) { + transition = crm_element_value(msg_data, + XML_ATTR_TRANSITION_KEY); + } else { + transition = crm_element_value(stored_msg, + XML_ATTR_TRANSITION_KEY); + } + + crm_info("Notifying %s on %s that %s was%s deleted", + from_sys, (from_host? from_host : "local node"), rsc_id, + ((rc == pcmk_rc_ok)? "" : " not")); + op = lrmd_new_event(rsc_id, CRMD_ACTION_DELETE, 0); + op->type = lrmd_event_exec_complete; + op->user_data = strdup(transition? transition : FAKE_TE_ID); + op->params = pcmk__strkey_table(free, free); + g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), + strdup(CRM_FEATURE_SET)); + controld_rc2event(op, rc); + controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id); + lrmd_free_event(op); + controld_trigger_delete_refresh(from_sys, rsc_id); + } + return I_NULL; + } +} + +/*! + * \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache + * + * \param[in] msg Message XML + * + * \return Next FSA input + */ +static enum crmd_fsa_input +handle_remote_state(const xmlNode *msg) +{ + const char *conn_host = NULL; + const char *remote_uname = ID(msg); + crm_node_t *remote_peer; + bool remote_is_up = false; + int rc = pcmk_rc_ok; + + rc = pcmk__xe_get_bool_attr(msg, XML_NODE_IN_CLUSTER, &remote_is_up); + + CRM_CHECK(remote_uname && rc == pcmk_rc_ok, return I_NULL); + + remote_peer = crm_remote_peer_get(remote_uname); + CRM_CHECK(remote_peer, return I_NULL); + + pcmk__update_peer_state(__func__, remote_peer, + remote_is_up ? CRM_NODE_MEMBER : CRM_NODE_LOST, + 0); + + conn_host = crm_element_value(msg, PCMK__XA_CONN_HOST); + if (conn_host) { + pcmk__str_update(&remote_peer->conn_host, conn_host); + } else if (remote_peer->conn_host) { + free(remote_peer->conn_host); + remote_peer->conn_host = NULL; + } + + return I_NULL; +} + +/*! + * \brief Handle a CRM_OP_PING message + * + * \param[in] msg Message XML + * + * \return Next FSA input + */ +static enum crmd_fsa_input +handle_ping(const xmlNode *msg) +{ + const char *value = NULL; + xmlNode *ping = NULL; + xmlNode *reply = NULL; + + // Build reply + + ping = create_xml_node(NULL, XML_CRM_TAG_PING); + value = crm_element_value(msg, F_CRM_SYS_TO); + crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value); + + // Add controller state + value = fsa_state2string(controld_globals.fsa_state); + crm_xml_add(ping, XML_PING_ATTR_CRMDSTATE, value); + crm_notice("Current ping state: %s", value); // CTS needs this + + // Add controller health + // @TODO maybe do some checks to determine meaningful status + crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok"); + + // Send reply + reply = create_reply(msg, ping); + free_xml(ping); + if (reply != NULL) { + (void) relay_message(reply, TRUE); + free_xml(reply); + } + + // Nothing further to do + return I_NULL; +} + +/*! + * \brief Handle a PCMK__CONTROLD_CMD_NODES message + * + * \param[in] request Message XML + * + * \return Next FSA input + */ +static enum crmd_fsa_input +handle_node_list(const xmlNode *request) +{ + GHashTableIter iter; + crm_node_t *node = NULL; + xmlNode *reply = NULL; + xmlNode *reply_data = NULL; + + // Create message data for reply + reply_data = create_xml_node(NULL, XML_CIB_TAG_NODES); + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { + xmlNode *xml = create_xml_node(reply_data, XML_CIB_TAG_NODE); + + crm_xml_add_ll(xml, XML_ATTR_ID, (long long) node->id); // uint32_t + crm_xml_add(xml, XML_ATTR_UNAME, node->uname); + crm_xml_add(xml, XML_NODE_IN_CLUSTER, node->state); + } + + // Create and send reply + reply = create_reply(request, reply_data); + free_xml(reply_data); + if (reply) { + (void) relay_message(reply, TRUE); + free_xml(reply); + } + + // Nothing further to do + return I_NULL; +} + +/*! + * \brief Handle a CRM_OP_NODE_INFO request + * + * \param[in] msg Message XML + * + * \return Next FSA input + */ +static enum crmd_fsa_input +handle_node_info_request(const xmlNode *msg) +{ + const char *value = NULL; + crm_node_t *node = NULL; + int node_id = 0; + xmlNode *reply = NULL; + xmlNode *reply_data = NULL; + + // Build reply + + reply_data = create_xml_node(NULL, XML_CIB_TAG_NODE); + crm_xml_add(reply_data, XML_PING_ATTR_SYSFROM, CRM_SYSTEM_CRMD); + + // Add whether current partition has quorum + pcmk__xe_set_bool_attr(reply_data, XML_ATTR_HAVE_QUORUM, + pcmk_is_set(controld_globals.flags, + controld_has_quorum)); + + // Check whether client requested node info by ID and/or name + crm_element_value_int(msg, XML_ATTR_ID, &node_id); + if (node_id < 0) { + node_id = 0; + } + value = crm_element_value(msg, XML_ATTR_UNAME); + + // Default to local node if none given + if ((node_id == 0) && (value == NULL)) { + value = controld_globals.our_nodename; + } + + node = pcmk__search_node_caches(node_id, value, CRM_GET_PEER_ANY); + if (node) { + crm_xml_add(reply_data, XML_ATTR_ID, node->uuid); + crm_xml_add(reply_data, XML_ATTR_UNAME, node->uname); + crm_xml_add(reply_data, XML_NODE_IS_PEER, node->state); + pcmk__xe_set_bool_attr(reply_data, XML_NODE_IS_REMOTE, + pcmk_is_set(node->flags, crm_remote_node)); + } + + // Send reply + reply = create_reply(msg, reply_data); + free_xml(reply_data); + if (reply != NULL) { + (void) relay_message(reply, TRUE); + free_xml(reply); + } + + // Nothing further to do + return I_NULL; +} + +static void +verify_feature_set(xmlNode *msg) +{ + const char *dc_version = crm_element_value(msg, XML_ATTR_CRM_VERSION); + + if (dc_version == NULL) { + /* All we really know is that the DC feature set is older than 3.1.0, + * but that's also all that really matters. + */ + dc_version = "3.0.14"; + } + + if (feature_set_compatible(dc_version, CRM_FEATURE_SET)) { + crm_trace("Local feature set (%s) is compatible with DC's (%s)", + CRM_FEATURE_SET, dc_version); + } else { + crm_err("Local feature set (%s) is incompatible with DC's (%s)", + CRM_FEATURE_SET, dc_version); + + // Nothing is likely to improve without administrator involvement + controld_set_fsa_input_flags(R_STAYDOWN); + crmd_exit(CRM_EX_FATAL); + } +} + +// DC gets own shutdown all-clear +static enum crmd_fsa_input +handle_shutdown_self_ack(xmlNode *stored_msg) +{ + const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); + + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + // The expected case -- we initiated own shutdown sequence + crm_info("Shutting down controller"); + return I_STOP; + } + + if (pcmk__str_eq(host_from, controld_globals.dc_name, pcmk__str_casei)) { + // Must be logic error -- DC confirming its own unrequested shutdown + crm_err("Shutting down controller immediately due to " + "unexpected shutdown confirmation"); + return I_TERMINATE; + } + + if (controld_globals.fsa_state != S_STOPPING) { + // Shouldn't happen -- non-DC confirming unrequested shutdown + crm_err("Starting new DC election because %s is " + "confirming shutdown we did not request", + (host_from? host_from : "another node")); + return I_ELECTION; + } + + // Shouldn't happen, but we are already stopping anyway + crm_debug("Ignoring unexpected shutdown confirmation from %s", + (host_from? host_from : "another node")); + return I_NULL; +} + +// Non-DC gets shutdown all-clear from DC +static enum crmd_fsa_input +handle_shutdown_ack(xmlNode *stored_msg) +{ + const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); + + if (host_from == NULL) { + crm_warn("Ignoring shutdown request without origin specified"); + return I_NULL; + } + + if (pcmk__str_eq(host_from, controld_globals.dc_name, + pcmk__str_null_matches|pcmk__str_casei)) { + + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + crm_info("Shutting down controller after confirmation from %s", + host_from); + } else { + crm_err("Shutting down controller after unexpected " + "shutdown request from %s", host_from); + controld_set_fsa_input_flags(R_STAYDOWN); + } + return I_STOP; + } + + crm_warn("Ignoring shutdown request from %s because DC is %s", + host_from, controld_globals.dc_name); + return I_NULL; +} + +static enum crmd_fsa_input +handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause) +{ + xmlNode *msg = NULL; + const char *op = crm_element_value(stored_msg, F_CRM_TASK); + + /* Optimize this for the DC - it has the most to do */ + + if (op == NULL) { + crm_log_xml_warn(stored_msg, "[request without " F_CRM_TASK "]"); + return I_NULL; + } + + if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { + const char *from = crm_element_value(stored_msg, F_CRM_HOST_FROM); + crm_node_t *node = pcmk__search_cluster_node_cache(0, from); + + pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN); + if(AM_I_DC == FALSE) { + return I_NULL; /* Done */ + } + } + + /*========== DC-Only Actions ==========*/ + if (AM_I_DC) { + if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) { + return I_NODE_JOIN; + + } else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) { + return I_JOIN_REQUEST; + + } else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) { + return I_JOIN_RESULT; + + } else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) { + return handle_shutdown_self_ack(stored_msg); + + } else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { + // Another controller wants to shut down its node + return handle_shutdown_request(stored_msg); + } + } + + /*========== common actions ==========*/ + if (strcmp(op, CRM_OP_NOVOTE) == 0) { + ha_msg_input_t fsa_input; + + fsa_input.msg = stored_msg; + register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, + A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, + __func__); + + } else if (strcmp(op, CRM_OP_REMOTE_STATE) == 0) { + /* a remote connection host is letting us know the node state */ + return handle_remote_state(stored_msg); + + } else if (strcmp(op, CRM_OP_THROTTLE) == 0) { + throttle_update(stored_msg); + if (AM_I_DC && (controld_globals.transition_graph != NULL) + && !controld_globals.transition_graph->complete) { + + crm_debug("The throttle changed. Trigger a graph."); + trigger_graph(); + } + return I_NULL; + + } else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) { + return handle_failcount_op(stored_msg); + + } else if (strcmp(op, CRM_OP_VOTE) == 0) { + /* count the vote and decide what to do after that */ + ha_msg_input_t fsa_input; + + fsa_input.msg = stored_msg; + register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, + A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, + __func__); + + /* Sometimes we _must_ go into S_ELECTION */ + if (controld_globals.fsa_state == S_HALT) { + crm_debug("Forcing an election from S_HALT"); + return I_ELECTION; +#if 0 + } else if (AM_I_DC) { + /* This is the old way of doing things but what is gained? */ + return I_ELECTION; +#endif + } + + } else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) { + verify_feature_set(stored_msg); + crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); + return I_JOIN_OFFER; + + } else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) { + crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); + return I_JOIN_RESULT; + + } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0) { + return handle_lrm_delete(stored_msg); + + } else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0) + || (strcmp(op, CRM_OP_LRM_REFRESH) == 0) // @COMPAT + || (strcmp(op, CRM_OP_REPROBE) == 0)) { + + crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD); + return I_ROUTER; + + } else if (strcmp(op, CRM_OP_NOOP) == 0) { + return I_NULL; + + } else if (strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) { + + crm_shutdown(SIGTERM); + /*return I_SHUTDOWN; */ + return I_NULL; + + } else if (strcmp(op, CRM_OP_PING) == 0) { + return handle_ping(stored_msg); + + } else if (strcmp(op, CRM_OP_NODE_INFO) == 0) { + return handle_node_info_request(stored_msg); + + } else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) { + int id = 0; + const char *name = NULL; + + crm_element_value_int(stored_msg, XML_ATTR_ID, &id); + name = crm_element_value(stored_msg, XML_ATTR_UNAME); + + if(cause == C_IPC_MESSAGE) { + msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); + if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { + crm_err("Could not instruct peers to remove references to node %s/%u", name, id); + } else { + crm_notice("Instructing peers to remove references to node %s/%u", name, id); + } + free_xml(msg); + + } else { + reap_crm_member(id, name); + + /* If we're forgetting this node, also forget any failures to fence + * it, so we don't carry that over to any node added later with the + * same name. + */ + st_fail_count_reset(name); + } + + } else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) { + xmlNode *xml = get_message_xml(stored_msg, F_CRM_DATA); + + remote_ra_process_maintenance_nodes(xml); + + } else if (strcmp(op, PCMK__CONTROLD_CMD_NODES) == 0) { + return handle_node_list(stored_msg); + + /*========== (NOT_DC)-Only Actions ==========*/ + } else if (!AM_I_DC) { + + if (strcmp(op, CRM_OP_SHUTDOWN) == 0) { + return handle_shutdown_ack(stored_msg); + } + + } else { + crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node"); + crm_log_xml_err(stored_msg, "Unexpected"); + } + + return I_NULL; +} + +static void +handle_response(xmlNode *stored_msg) +{ + const char *op = crm_element_value(stored_msg, F_CRM_TASK); + + if (op == NULL) { + crm_log_xml_err(stored_msg, "Bad message"); + + } else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) { + // Check whether scheduler answer been superseded by subsequent request + const char *msg_ref = crm_element_value(stored_msg, XML_ATTR_REFERENCE); + + if (msg_ref == NULL) { + crm_err("%s - Ignoring calculation with no reference", op); + + } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref, + pcmk__str_none)) { + ha_msg_input_t fsa_input; + + controld_stop_sched_timer(); + fsa_input.msg = stored_msg; + register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input); + + } else { + crm_info("%s calculation %s is obsolete", op, msg_ref); + } + + } else if (strcmp(op, CRM_OP_VOTE) == 0 + || strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) { + + } else { + const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); + + crm_err("Unexpected response (op=%s, src=%s) sent to the %s", + op, host_from, AM_I_DC ? "DC" : "controller"); + } +} + +static enum crmd_fsa_input +handle_shutdown_request(xmlNode * stored_msg) +{ + /* handle here to avoid potential version issues + * where the shutdown message/procedure may have + * been changed in later versions. + * + * This way the DC is always in control of the shutdown + */ + + char *now_s = NULL; + const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); + + if (host_from == NULL) { + /* we're shutting down and the DC */ + host_from = controld_globals.our_nodename; + } + + crm_info("Creating shutdown request for %s (state=%s)", host_from, + fsa_state2string(controld_globals.fsa_state)); + crm_log_xml_trace(stored_msg, "message"); + + now_s = pcmk__ttoa(time(NULL)); + update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, FALSE); + free(now_s); + + /* will be picked up by the TE as long as its running */ + return I_NULL; +} + +static void +send_msg_via_ipc(xmlNode * msg, const char *sys) +{ + pcmk__client_t *client_channel = NULL; + + CRM_CHECK(sys != NULL, return); + + client_channel = pcmk__find_client_by_id(sys); + + if (crm_element_value(msg, F_CRM_HOST_FROM) == NULL) { + crm_xml_add(msg, F_CRM_HOST_FROM, controld_globals.our_nodename); + } + + if (client_channel != NULL) { + /* Transient clients such as crmadmin */ + pcmk__ipc_send_xml(client_channel, 0, msg, crm_ipc_server_event); + + } else if (pcmk__str_eq(sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) { + xmlNode *data = get_message_xml(msg, F_CRM_DATA); + + process_te_message(msg, data); + + } else if (pcmk__str_eq(sys, CRM_SYSTEM_LRMD, pcmk__str_none)) { + fsa_data_t fsa_data; + ha_msg_input_t fsa_input; + + fsa_input.msg = msg; + fsa_input.xml = get_message_xml(msg, F_CRM_DATA); + + fsa_data.id = 0; + fsa_data.actions = 0; + fsa_data.data = &fsa_input; + fsa_data.fsa_input = I_MESSAGE; + fsa_data.fsa_cause = C_IPC_MESSAGE; + fsa_data.origin = __func__; + fsa_data.data_type = fsa_dt_ha_msg; + + do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, controld_globals.fsa_state, + I_MESSAGE, &fsa_data); + + } else if (crmd_is_proxy_session(sys)) { + crmd_proxy_send(sys, msg); + + } else { + crm_info("Received invalid request: unknown subsystem '%s'", sys); + } +} + +void +delete_ha_msg_input(ha_msg_input_t * orig) +{ + if (orig == NULL) { + return; + } + free_xml(orig->msg); + free(orig); +} + +/*! + * \internal + * \brief Notify the cluster of a remote node state change + * + * \param[in] node_name Node's name + * \param[in] node_up true if node is up, false if down + */ +void +broadcast_remote_state_message(const char *node_name, bool node_up) +{ + xmlNode *msg = create_request(CRM_OP_REMOTE_STATE, NULL, NULL, + CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); + + crm_info("Notifying cluster of Pacemaker Remote node %s %s", + node_name, node_up? "coming up" : "going down"); + + crm_xml_add(msg, XML_ATTR_ID, node_name); + pcmk__xe_set_bool_attr(msg, XML_NODE_IN_CLUSTER, node_up); + + if (node_up) { + crm_xml_add(msg, PCMK__XA_CONN_HOST, controld_globals.our_nodename); + } + + send_cluster_message(NULL, crm_msg_crmd, msg, TRUE); + free_xml(msg); +} + diff --git a/daemons/controld/controld_messages.h b/daemons/controld/controld_messages.h new file mode 100644 index 0000000..4108961 --- /dev/null +++ b/daemons/controld/controld_messages.h @@ -0,0 +1,86 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef XML_CRM_MESSAGES__H +# define XML_CRM_MESSAGES__H + +# include +# include +# include +# include +# include + +typedef struct ha_msg_input_s { + xmlNode *msg; + xmlNode *xml; + +} ha_msg_input_t; + +extern void delete_ha_msg_input(ha_msg_input_t * orig); + +extern void *fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, + const char *caller); + +# define fsa_typed_data(x) fsa_typed_data_adv(msg_data, x, __func__) + +extern void register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, + fsa_data_t * cur_data, void *new_data, const char *raised_from); + +#define register_fsa_error(cause, input, new_data) \ + register_fsa_error_adv(cause, input, msg_data, new_data, __func__) + +void register_fsa_input_adv(enum crmd_fsa_cause cause, + enum crmd_fsa_input input, void *data, + uint64_t with_actions, gboolean prepend, + const char *raised_from); + +extern void fsa_dump_queue(int log_level); +extern void route_message(enum crmd_fsa_cause cause, xmlNode * input); + +# define crmd_fsa_stall(suppress) do { \ + if(suppress == FALSE && msg_data != NULL) { \ + register_fsa_input_adv( \ + ((fsa_data_t*)msg_data)->fsa_cause, I_WAIT_FOR_EVENT, \ + ((fsa_data_t*)msg_data)->data, action, TRUE, __func__); \ + } else { \ + register_fsa_input_adv( \ + C_FSA_INTERNAL, I_WAIT_FOR_EVENT, \ + NULL, action, TRUE, __func__); \ + } \ + } while(0) + +#define register_fsa_input(cause, input, data) \ + register_fsa_input_adv(cause, input, data, A_NOTHING, FALSE, __func__) + +#define register_fsa_input_before(cause, input, data) \ + register_fsa_input_adv(cause, input, data, A_NOTHING, TRUE, __func__) + +#define register_fsa_input_later(cause, input, data) \ + register_fsa_input_adv(cause, input, data, A_NOTHING, FALSE, __func__) + +void delete_fsa_input(fsa_data_t * fsa_data); + +fsa_data_t *get_message(void); + +extern gboolean relay_message(xmlNode * relay_message, gboolean originated_locally); + +gboolean crmd_is_proxy_session(const char *session); +void crmd_proxy_send(const char *session, xmlNode *msg); + +bool controld_authorize_ipc_message(const xmlNode *client_msg, + pcmk__client_t *curr_client, + const char *proxy_session); + +extern gboolean send_request(xmlNode * msg, char **msg_reference); + +extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig); + +void broadcast_remote_state_message(const char *node_name, bool node_up); + +#endif diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c new file mode 100644 index 0000000..240a978 --- /dev/null +++ b/daemons/controld/controld_metadata.c @@ -0,0 +1,320 @@ +/* + * Copyright 2017-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include + +#include +#include + +#include + +static void +ra_param_free(void *param) +{ + if (param) { + struct ra_param_s *p = (struct ra_param_s *) param; + + if (p->rap_name) { + free(p->rap_name); + } + free(param); + } +} + +static void +metadata_free(void *metadata) +{ + if (metadata) { + struct ra_metadata_s *md = (struct ra_metadata_s *) metadata; + + g_list_free_full(md->ra_params, ra_param_free); + free(metadata); + } +} + +GHashTable * +metadata_cache_new(void) +{ + return pcmk__strkey_table(free, metadata_free); +} + +void +metadata_cache_free(GHashTable *mdc) +{ + if (mdc) { + crm_trace("Destroying metadata cache with %d members", g_hash_table_size(mdc)); + g_hash_table_destroy(mdc); + } +} + +void +metadata_cache_reset(GHashTable *mdc) +{ + if (mdc) { + crm_trace("Resetting metadata cache with %d members", + g_hash_table_size(mdc)); + g_hash_table_remove_all(mdc); + } +} + +static struct ra_param_s * +ra_param_from_xml(xmlNode *param_xml) +{ + const char *param_name = crm_element_value(param_xml, "name"); + struct ra_param_s *p; + + p = calloc(1, sizeof(struct ra_param_s)); + if (p == NULL) { + return NULL; + } + + p->rap_name = strdup(param_name); + if (p->rap_name == NULL) { + free(p); + return NULL; + } + + if (pcmk__xe_attr_is_true(param_xml, "reloadable")) { + controld_set_ra_param_flags(p, ra_param_reloadable); + } + + if (pcmk__xe_attr_is_true(param_xml, "unique")) { + controld_set_ra_param_flags(p, ra_param_unique); + } + + if (pcmk__xe_attr_is_true(param_xml, "private")) { + controld_set_ra_param_flags(p, ra_param_private); + } + return p; +} + +static void +log_ra_ocf_version(const char *ra_key, const char *ra_ocf_version) +{ + if (pcmk__str_empty(ra_ocf_version)) { + crm_warn("%s does not advertise OCF version supported", ra_key); + + } else if (compare_version(ra_ocf_version, "2") >= 0) { + crm_warn("%s supports OCF version %s (this Pacemaker version supports " + PCMK_OCF_VERSION " and might not work properly with agent)", + ra_key, ra_ocf_version); + + } else if (compare_version(ra_ocf_version, PCMK_OCF_VERSION) > 0) { + crm_info("%s supports OCF version %s (this Pacemaker version supports " + PCMK_OCF_VERSION " and might not use all agent features)", + ra_key, ra_ocf_version); + + } else { + crm_debug("%s supports OCF version %s", ra_key, ra_ocf_version); + } +} + +struct ra_metadata_s * +controld_cache_metadata(GHashTable *mdc, const lrmd_rsc_info_t *rsc, + const char *metadata_str) +{ + char *key = NULL; + const char *reason = NULL; + xmlNode *metadata = NULL; + xmlNode *match = NULL; + struct ra_metadata_s *md = NULL; + bool any_private_params = false; + bool ocf1_1 = false; + + CRM_CHECK(mdc && rsc && metadata_str, return NULL); + + key = crm_generate_ra_key(rsc->standard, rsc->provider, rsc->type); + if (!key) { + reason = "Invalid resource agent standard or type"; + goto err; + } + + metadata = string2xml(metadata_str); + if (!metadata) { + reason = "Metadata is not valid XML"; + goto err; + } + + md = calloc(1, sizeof(struct ra_metadata_s)); + if (md == NULL) { + reason = "Could not allocate memory"; + goto err; + } + + if (strcmp(rsc->standard, PCMK_RESOURCE_CLASS_OCF) == 0) { + xmlChar *content = NULL; + xmlNode *version_element = first_named_child(metadata, "version"); + + if (version_element != NULL) { + content = xmlNodeGetContent(version_element); + } + log_ra_ocf_version(key, (const char *) content); + if (content != NULL) { + ocf1_1 = (compare_version((const char *) content, "1.1") >= 0); + xmlFree(content); + } + } + + // Check supported actions + match = first_named_child(metadata, "actions"); + for (match = first_named_child(match, "action"); match != NULL; + match = crm_next_same_xml(match)) { + + const char *action_name = crm_element_value(match, "name"); + + if (pcmk__str_eq(action_name, CRMD_ACTION_RELOAD_AGENT, + pcmk__str_none)) { + if (ocf1_1) { + controld_set_ra_flags(md, key, ra_supports_reload_agent); + } else { + crm_notice("reload-agent action will not be used with %s " + "because it does not support OCF 1.1 or later", key); + } + + } else if (!ocf1_1 && pcmk__str_eq(action_name, CRMD_ACTION_RELOAD, + pcmk__str_casei)) { + controld_set_ra_flags(md, key, ra_supports_legacy_reload); + } + } + + // Build a parameter list + match = first_named_child(metadata, "parameters"); + for (match = first_named_child(match, "parameter"); match != NULL; + match = crm_next_same_xml(match)) { + + const char *param_name = crm_element_value(match, "name"); + + if (param_name == NULL) { + crm_warn("Metadata for %s:%s:%s has parameter without a name", + rsc->standard, rsc->provider, rsc->type); + } else { + struct ra_param_s *p = ra_param_from_xml(match); + + if (p == NULL) { + reason = "Could not allocate memory"; + goto err; + } + if (pcmk_is_set(p->rap_flags, ra_param_private)) { + any_private_params = true; + } + md->ra_params = g_list_prepend(md->ra_params, p); + } + } + + /* Newer resource agents support the "private" parameter attribute to + * indicate sensitive parameters. For backward compatibility with older + * agents, implicitly treat a few common names as private when the agent + * doesn't specify any explicitly. + */ + if (!any_private_params) { + for (GList *iter = md->ra_params; iter != NULL; iter = iter->next) { + struct ra_param_s *p = iter->data; + + if (pcmk__str_any_of(p->rap_name, "password", "passwd", "user", + NULL)) { + controld_set_ra_param_flags(p, ra_param_private); + } + } + } + + g_hash_table_replace(mdc, key, md); + free_xml(metadata); + return md; + +err: + crm_warn("Unable to update metadata for %s (%s%s%s:%s): %s", + rsc->id, rsc->standard, ((rsc->provider == NULL)? "" : ":"), + pcmk__s(rsc->provider, ""), rsc->type, reason); + free(key); + free_xml(metadata); + metadata_free(md); + return NULL; +} + +/*! + * \internal + * \brief Get meta-data for a resource + * + * \param[in,out] lrm_state Use meta-data cache from this executor connection + * \param[in] rsc Resource to get meta-data for + * \param[in] source Allowed meta-data sources (bitmask of + * enum controld_metadata_source_e values) + * + * \return Meta-data cache entry for given resource, or NULL if not available + */ +struct ra_metadata_s * +controld_get_rsc_metadata(lrm_state_t *lrm_state, const lrmd_rsc_info_t *rsc, + uint32_t source) +{ + struct ra_metadata_s *metadata = NULL; + char *metadata_str = NULL; + char *key = NULL; + int rc = pcmk_ok; + + CRM_CHECK((lrm_state != NULL) && (rsc != NULL), return NULL); + + if (pcmk_is_set(source, controld_metadata_from_cache)) { + key = crm_generate_ra_key(rsc->standard, rsc->provider, rsc->type); + if (key != NULL) { + metadata = g_hash_table_lookup(lrm_state->metadata_cache, key); + free(key); + } + if (metadata != NULL) { + crm_debug("Retrieved metadata for %s (%s%s%s:%s) from cache", + rsc->id, rsc->standard, + ((rsc->provider == NULL)? "" : ":"), + ((rsc->provider == NULL)? "" : rsc->provider), + rsc->type); + return metadata; + } + } + + if (!pcmk_is_set(source, controld_metadata_from_agent)) { + return NULL; + } + + /* For most actions, metadata was cached asynchronously before action + * execution (via metadata_complete()). + * + * However if that failed, and for other actions, retrieve the metadata now + * via a local, synchronous, direct execution of the agent. + * + * This has multiple issues, which is why this is just a fallback: the + * executor should execute agents, not the controller; metadata for + * Pacemaker Remote nodes should be collected on those nodes, not locally; + * the metadata call shouldn't eat into the timeout of the real action being + * performed; and the synchronous call blocks the controller (which also + * means that if the metadata action tries to contact the controller, + * everything will hang until the timeout). + */ + crm_debug("Retrieving metadata for %s (%s%s%s:%s) synchronously", + rsc->id, rsc->standard, + ((rsc->provider == NULL)? "" : ":"), + ((rsc->provider == NULL)? "" : rsc->provider), + rsc->type); + rc = lrm_state_get_metadata(lrm_state, rsc->standard, rsc->provider, + rsc->type, &metadata_str, 0); + if (rc != pcmk_ok) { + crm_warn("Failed to get metadata for %s (%s%s%s:%s): %s", + rsc->id, rsc->standard, + ((rsc->provider == NULL)? "" : ":"), + ((rsc->provider == NULL)? "" : rsc->provider), + rsc->type, pcmk_strerror(rc)); + return NULL; + } + + metadata = controld_cache_metadata(lrm_state->metadata_cache, rsc, + metadata_str); + free(metadata_str); + return metadata; +} diff --git a/daemons/controld/controld_metadata.h b/daemons/controld/controld_metadata.h new file mode 100644 index 0000000..12ea327 --- /dev/null +++ b/daemons/controld/controld_metadata.h @@ -0,0 +1,96 @@ +/* + * Copyright 2017-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#ifndef CRMD_METADATA_H +#define CRMD_METADATA_H + +#include // uint32_t +#include // GList, GHashTable +#include "controld_lrm.h" // lrm_state_t, lrm_rsc_info_t + +/* + * @COMPAT pre-OCF-1.1 resource agents + * + * Pacemaker previously used the "reload" action to reload agent parameters, + * but most agents used it to reload the service configuration. Pacemaker also + * misused the OCF 1.0 "unique" parameter attribute to indicate reloadability. + * + * OCF 1.1 created the "reload-agent" action and "reloadable" parameter + * attribute for the Pacemaker usage. + * + * Pacemaker now supports the OCF 1.1 usage. The old usage is now deprecated, + * but will be supported if the agent does not claim OCF 1.1 or later + * compliance and does not advertise the reload-agent action. + */ +enum ra_flags_e { + ra_supports_legacy_reload = (1 << 0), + ra_supports_reload_agent = (1 << 1), +}; + +enum ra_param_flags_e { + ra_param_unique = (1 << 0), + ra_param_private = (1 << 1), + ra_param_reloadable = (1 << 2), +}; + +// Allowed sources of resource agent meta-data when requesting it +enum controld_metadata_source_e { + controld_metadata_from_cache = (1 << 0), + controld_metadata_from_agent = (1 << 1), +}; + +struct ra_param_s { + char *rap_name; + uint32_t rap_flags; // bitmask of ra_param_flags_s +}; + +struct ra_metadata_s { + GList *ra_params; // ra_param_s + uint32_t ra_flags; // bitmask of ra_flags_e +}; + +#define controld_set_ra_flags(ra_md, ra_key, flags_to_set) do { \ + (ra_md)->ra_flags = pcmk__set_flags_as(__func__, __LINE__, \ + LOG_TRACE, "Resource agent", ra_key, \ + (ra_md)->ra_flags, (flags_to_set), #flags_to_set); \ + } while (0) + +#define controld_set_ra_param_flags(ra_param, flags_to_set) do { \ + (ra_param)->rap_flags = pcmk__set_flags_as(__func__, __LINE__, \ + LOG_TRACE, "Resource agent parameter", (ra_param)->rap_name, \ + (ra_param)->rap_flags, (flags_to_set), #flags_to_set); \ + } while (0) + +GHashTable *metadata_cache_new(void); +void metadata_cache_free(GHashTable *mdc); +void metadata_cache_reset(GHashTable *mdc); + +struct ra_metadata_s *controld_cache_metadata(GHashTable *mdc, + const lrmd_rsc_info_t *rsc, + const char *metadata_str); +struct ra_metadata_s *controld_get_rsc_metadata(lrm_state_t *lrm_state, + const lrmd_rsc_info_t *rsc, + uint32_t source); + +static inline const char * +ra_param_flag2text(enum ra_param_flags_e flag) +{ + switch (flag) { + case ra_param_reloadable: + return "reloadable"; + case ra_param_unique: + return "unique"; + case ra_param_private: + return "private"; + default: + return "unknown"; + } +} + +#endif diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c new file mode 100644 index 0000000..f24b755 --- /dev/null +++ b/daemons/controld/controld_remote_ra.c @@ -0,0 +1,1440 @@ +/* + * Copyright 2013-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include + +#define REMOTE_LRMD_RA "remote" + +/* The max start timeout before cmd retry */ +#define MAX_START_TIMEOUT_MS 10000 + +#define cmd_set_flags(cmd, flags_to_set) do { \ + (cmd)->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ + "Remote command", (cmd)->rsc_id, (cmd)->status, \ + (flags_to_set), #flags_to_set); \ + } while (0) + +#define cmd_clear_flags(cmd, flags_to_clear) do { \ + (cmd)->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ + "Remote command", (cmd)->rsc_id, (cmd)->status, \ + (flags_to_clear), #flags_to_clear); \ + } while (0) + +enum remote_cmd_status { + cmd_reported_success = (1 << 0), + cmd_cancel = (1 << 1), +}; + +typedef struct remote_ra_cmd_s { + /*! the local node the cmd is issued from */ + char *owner; + /*! the remote node the cmd is executed on */ + char *rsc_id; + /*! the action to execute */ + char *action; + /*! some string the client wants us to give it back */ + char *userdata; + /*! start delay in ms */ + int start_delay; + /*! timer id used for start delay. */ + int delay_id; + /*! timeout in ms for cmd */ + int timeout; + int remaining_timeout; + /*! recurring interval in ms */ + guint interval_ms; + /*! interval timer id */ + int interval_id; + int monitor_timeout_id; + int takeover_timeout_id; + /*! action parameters */ + lrmd_key_value_t *params; + pcmk__action_result_t result; + int call_id; + time_t start_time; + uint32_t status; +} remote_ra_cmd_t; + +#define lrm_remote_set_flags(lrm_state, flags_to_set) do { \ + lrm_state_t *lrm = (lrm_state); \ + remote_ra_data_t *ra = lrm->remote_ra_data; \ + ra->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \ + lrm->node_name, ra->status, \ + (flags_to_set), #flags_to_set); \ + } while (0) + +#define lrm_remote_clear_flags(lrm_state, flags_to_clear) do { \ + lrm_state_t *lrm = (lrm_state); \ + remote_ra_data_t *ra = lrm->remote_ra_data; \ + ra->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \ + lrm->node_name, ra->status, \ + (flags_to_clear), #flags_to_clear); \ + } while (0) + +enum remote_status { + expect_takeover = (1 << 0), + takeover_complete = (1 << 1), + remote_active = (1 << 2), + /* Maintenance mode is difficult to determine from the controller's context, + * so we have it signalled back with the transition from the scheduler. + */ + remote_in_maint = (1 << 3), + /* Similar for whether we are controlling a guest node or remote node. + * Fortunately there is a meta-attribute in the transition already and + * as the situation doesn't change over time we can use the + * resource start for noting down the information for later use when + * the attributes aren't at hand. + */ + controlling_guest = (1 << 4), +}; + +typedef struct remote_ra_data_s { + crm_trigger_t *work; + remote_ra_cmd_t *cur_cmd; + GList *cmds; + GList *recurring_cmds; + uint32_t status; +} remote_ra_data_t; + +static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms); +static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd); +static GList *fail_all_monitor_cmds(GList * list); + +static void +free_cmd(gpointer user_data) +{ + remote_ra_cmd_t *cmd = user_data; + + if (!cmd) { + return; + } + if (cmd->delay_id) { + g_source_remove(cmd->delay_id); + } + if (cmd->interval_id) { + g_source_remove(cmd->interval_id); + } + if (cmd->monitor_timeout_id) { + g_source_remove(cmd->monitor_timeout_id); + } + if (cmd->takeover_timeout_id) { + g_source_remove(cmd->takeover_timeout_id); + } + free(cmd->owner); + free(cmd->rsc_id); + free(cmd->action); + free(cmd->userdata); + pcmk__reset_result(&(cmd->result)); + lrmd_key_value_freeall(cmd->params); + free(cmd); +} + +static int +generate_callid(void) +{ + static int remote_ra_callid = 0; + + remote_ra_callid++; + if (remote_ra_callid <= 0) { + remote_ra_callid = 1; + } + + return remote_ra_callid; +} + +static gboolean +recurring_helper(gpointer data) +{ + remote_ra_cmd_t *cmd = data; + lrm_state_t *connection_rsc = NULL; + + cmd->interval_id = 0; + connection_rsc = lrm_state_find(cmd->rsc_id); + if (connection_rsc && connection_rsc->remote_ra_data) { + remote_ra_data_t *ra_data = connection_rsc->remote_ra_data; + + ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd); + + ra_data->cmds = g_list_append(ra_data->cmds, cmd); + mainloop_set_trigger(ra_data->work); + } + return FALSE; +} + +static gboolean +start_delay_helper(gpointer data) +{ + remote_ra_cmd_t *cmd = data; + lrm_state_t *connection_rsc = NULL; + + cmd->delay_id = 0; + connection_rsc = lrm_state_find(cmd->rsc_id); + if (connection_rsc && connection_rsc->remote_ra_data) { + remote_ra_data_t *ra_data = connection_rsc->remote_ra_data; + + mainloop_set_trigger(ra_data->work); + } + return FALSE; +} + +static bool +should_purge_attributes(crm_node_t *node) +{ + bool purge = true; + crm_node_t *conn_node = NULL; + lrm_state_t *connection_rsc = NULL; + + if (!node->conn_host) { + return purge; + } + + /* Get the node that was hosting the remote connection resource from the + * peer cache. That's the one we really care about here. + */ + conn_node = crm_get_peer(0, node->conn_host); + if (conn_node == NULL) { + return purge; + } + + /* Check the uptime of connection_rsc. If it hasn't been running long + * enough, set purge=true. "Long enough" means it started running earlier + * than the timestamp when we noticed it went away in the first place. + */ + connection_rsc = lrm_state_find(node->uname); + + if (connection_rsc != NULL) { + lrmd_t *lrm = connection_rsc->conn; + time_t uptime = lrmd__uptime(lrm); + time_t now = time(NULL); + + /* Add 20s of fuzziness to give corosync a while to notice the remote + * host is gone. On various error conditions (failure to get uptime, + * peer_lost isn't set) we default to purging. + */ + if (uptime > 0 && + conn_node->peer_lost > 0 && + uptime + 20 >= now - conn_node->peer_lost) { + purge = false; + } + } + + return purge; +} + +static enum controld_section_e +section_to_delete(bool purge) +{ + if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { + if (purge) { + return controld_section_all_unlocked; + } else { + return controld_section_lrm_unlocked; + } + } else { + if (purge) { + return controld_section_all; + } else { + return controld_section_lrm; + } + } +} + +static void +purge_remote_node_attrs(int call_opt, crm_node_t *node) +{ + bool purge = should_purge_attributes(node); + enum controld_section_e section = section_to_delete(purge); + + /* Purge node from attrd's memory */ + if (purge) { + update_attrd_remote_node_removed(node->uname, NULL); + } + + controld_delete_node_state(node->uname, section, call_opt); +} + +/*! + * \internal + * \brief Handle cluster communication related to pacemaker_remote node joining + * + * \param[in] node_name Name of newly integrated pacemaker_remote node + */ +static void +remote_node_up(const char *node_name) +{ + int call_opt; + xmlNode *update, *state; + crm_node_t *node; + + CRM_CHECK(node_name != NULL, return); + crm_info("Announcing Pacemaker Remote node %s", node_name); + + call_opt = crmd_cib_smart_opt(); + + /* Delete node's probe_complete attribute. This serves two purposes: + * + * - @COMPAT DCs < 1.1.14 in a rolling upgrade might use it + * - deleting it (or any attribute for that matter) here ensures the + * attribute manager learns the node is remote + */ + update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE); + + /* Ensure node is in the remote peer cache with member status */ + node = crm_remote_peer_get(node_name); + CRM_CHECK(node != NULL, return); + + purge_remote_node_attrs(call_opt, node); + pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0); + + /* pacemaker_remote nodes don't participate in the membership layer, + * so cluster nodes don't automatically get notified when they come and go. + * We send a cluster message to the DC, and update the CIB node state entry, + * so the DC will get it sooner (via message) or later (via CIB refresh), + * and any other interested parties can query the CIB. + */ + broadcast_remote_state_message(node_name, true); + + update = create_xml_node(NULL, XML_CIB_TAG_STATUS); + state = create_node_state_update(node, node_update_cluster, update, + __func__); + + /* Clear the XML_NODE_IS_FENCED flag in the node state. If the node ever + * needs to be fenced, this flag will allow various actions to determine + * whether the fencing has happened yet. + */ + crm_xml_add(state, XML_NODE_IS_FENCED, "0"); + + /* TODO: If the remote connection drops, and this (async) CIB update either + * failed or has not yet completed, later actions could mistakenly think the + * node has already been fenced (if the XML_NODE_IS_FENCED attribute was + * previously set, because it won't have been cleared). This could prevent + * actual fencing or allow recurring monitor failures to be cleared too + * soon. Ideally, we wouldn't rely on the CIB for the fenced status. + */ + controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, NULL); + free_xml(update); +} + +enum down_opts { + DOWN_KEEP_LRM, + DOWN_ERASE_LRM +}; + +/*! + * \internal + * \brief Handle cluster communication related to pacemaker_remote node leaving + * + * \param[in] node_name Name of lost node + * \param[in] opts Whether to keep or erase LRM history + */ +static void +remote_node_down(const char *node_name, const enum down_opts opts) +{ + xmlNode *update; + int call_opt = crmd_cib_smart_opt(); + crm_node_t *node; + + /* Purge node from attrd's memory */ + update_attrd_remote_node_removed(node_name, NULL); + + /* Normally, only node attributes should be erased, and the resource history + * should be kept until the node comes back up. However, after a successful + * fence, we want to clear the history as well, so we don't think resources + * are still running on the node. + */ + if (opts == DOWN_ERASE_LRM) { + controld_delete_node_state(node_name, controld_section_all, call_opt); + } else { + controld_delete_node_state(node_name, controld_section_attrs, call_opt); + } + + /* Ensure node is in the remote peer cache with lost state */ + node = crm_remote_peer_get(node_name); + CRM_CHECK(node != NULL, return); + pcmk__update_peer_state(__func__, node, CRM_NODE_LOST, 0); + + /* Notify DC */ + broadcast_remote_state_message(node_name, false); + + /* Update CIB node state */ + update = create_xml_node(NULL, XML_CIB_TAG_STATUS); + create_node_state_update(node, node_update_cluster, update, __func__); + controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, NULL); + free_xml(update); +} + +/*! + * \internal + * \brief Handle effects of a remote RA command on node state + * + * \param[in] cmd Completed remote RA command + */ +static void +check_remote_node_state(const remote_ra_cmd_t *cmd) +{ + /* Only successful actions can change node state */ + if (!pcmk__result_ok(&(cmd->result))) { + return; + } + + if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) { + remote_node_up(cmd->rsc_id); + + } else if (pcmk__str_eq(cmd->action, "migrate_from", pcmk__str_casei)) { + /* After a successful migration, we don't need to do remote_node_up() + * because the DC already knows the node is up, and we don't want to + * clear LRM history etc. We do need to add the remote node to this + * host's remote peer cache, because (unless it happens to be DC) + * it hasn't been tracking the remote node, and other code relies on + * the cache to distinguish remote nodes from unseen cluster nodes. + */ + crm_node_t *node = crm_remote_peer_get(cmd->rsc_id); + + CRM_CHECK(node != NULL, return); + pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0); + + } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { + lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id); + remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL; + + if (ra_data) { + if (!pcmk_is_set(ra_data->status, takeover_complete)) { + /* Stop means down if we didn't successfully migrate elsewhere */ + remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM); + } else if (AM_I_DC == FALSE) { + /* Only the connection host and DC track node state, + * so if the connection migrated elsewhere and we aren't DC, + * un-cache the node, so we don't have stale info + */ + crm_remote_peer_cache_remove(cmd->rsc_id); + } + } + } + + /* We don't do anything for successful monitors, which is correct for + * routine recurring monitors, and for monitors on nodes where the + * connection isn't supposed to be (the cluster will stop the connection in + * that case). However, if the initial probe finds the connection already + * active on the node where we want it, we probably should do + * remote_node_up(). Unfortunately, we can't distinguish that case here. + * Given that connections have to be initiated by the cluster, the chance of + * that should be close to zero. + */ +} + +static void +report_remote_ra_result(remote_ra_cmd_t * cmd) +{ + lrmd_event_data_t op = { 0, }; + + check_remote_node_state(cmd); + + op.type = lrmd_event_exec_complete; + op.rsc_id = cmd->rsc_id; + op.op_type = cmd->action; + op.user_data = cmd->userdata; + op.timeout = cmd->timeout; + op.interval_ms = cmd->interval_ms; + op.t_run = (unsigned int) cmd->start_time; + op.t_rcchange = (unsigned int) cmd->start_time; + + lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status, + cmd->result.exit_reason); + + if (pcmk_is_set(cmd->status, cmd_reported_success) && !pcmk__result_ok(&(cmd->result))) { + op.t_rcchange = (unsigned int) time(NULL); + /* This edge case will likely never ever occur, but if it does the + * result is that a failure will not be processed correctly. This is only + * remotely possible because we are able to detect a connection resource's tcp + * connection has failed at any moment after start has completed. The actual + * recurring operation is just a connectivity ping. + * + * basically, we are not guaranteed that the first successful monitor op and + * a subsequent failed monitor op will not occur in the same timestamp. We have to + * make it look like the operations occurred at separate times though. */ + if (op.t_rcchange == op.t_run) { + op.t_rcchange++; + } + } + + if (cmd->params) { + lrmd_key_value_t *tmp; + + op.params = pcmk__strkey_table(free, free); + for (tmp = cmd->params; tmp; tmp = tmp->next) { + g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value)); + } + + } + op.call_id = cmd->call_id; + op.remote_nodename = cmd->owner; + + lrm_op_callback(&op); + + if (op.params) { + g_hash_table_destroy(op.params); + } + lrmd__reset_result(&op); +} + +static void +update_remaining_timeout(remote_ra_cmd_t * cmd) +{ + cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000; +} + +static gboolean +retry_start_cmd_cb(gpointer data) +{ + lrm_state_t *lrm_state = data; + remote_ra_data_t *ra_data = lrm_state->remote_ra_data; + remote_ra_cmd_t *cmd = NULL; + int rc = ETIME; + + if (!ra_data || !ra_data->cur_cmd) { + return FALSE; + } + cmd = ra_data->cur_cmd; + if (!pcmk__strcase_any_of(cmd->action, "start", "migrate_from", NULL)) { + return FALSE; + } + update_remaining_timeout(cmd); + + if (cmd->remaining_timeout > 0) { + rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout); + } else { + pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, + PCMK_EXEC_TIMEOUT, + "Not enough time remains to retry remote connection"); + } + + if (rc != pcmk_rc_ok) { + report_remote_ra_result(cmd); + + if (ra_data->cmds) { + mainloop_set_trigger(ra_data->work); + } + ra_data->cur_cmd = NULL; + free_cmd(cmd); + } else { + /* wait for connection event */ + } + + return FALSE; +} + + +static gboolean +connection_takeover_timeout_cb(gpointer data) +{ + lrm_state_t *lrm_state = NULL; + remote_ra_cmd_t *cmd = data; + + crm_info("takeover event timed out for node %s", cmd->rsc_id); + cmd->takeover_timeout_id = 0; + + lrm_state = lrm_state_find(cmd->rsc_id); + + handle_remote_ra_stop(lrm_state, cmd); + free_cmd(cmd); + + return FALSE; +} + +static gboolean +monitor_timeout_cb(gpointer data) +{ + lrm_state_t *lrm_state = NULL; + remote_ra_cmd_t *cmd = data; + + lrm_state = lrm_state_find(cmd->rsc_id); + + crm_info("Timed out waiting for remote poke response from %s%s", + cmd->rsc_id, (lrm_state? "" : " (no LRM state)")); + cmd->monitor_timeout_id = 0; + pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT, + "Remote executor did not respond"); + + if (lrm_state && lrm_state->remote_ra_data) { + remote_ra_data_t *ra_data = lrm_state->remote_ra_data; + + if (ra_data->cur_cmd == cmd) { + ra_data->cur_cmd = NULL; + } + if (ra_data->cmds) { + mainloop_set_trigger(ra_data->work); + } + } + + report_remote_ra_result(cmd); + free_cmd(cmd); + + if(lrm_state) { + lrm_state_disconnect(lrm_state); + } + return FALSE; +} + +static void +synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type) +{ + lrmd_event_data_t op = { 0, }; + + if (lrm_state == NULL) { + /* if lrm_state not given assume local */ + lrm_state = lrm_state_find(controld_globals.our_nodename); + } + CRM_ASSERT(lrm_state != NULL); + + op.type = lrmd_event_exec_complete; + op.rsc_id = rsc_id; + op.op_type = op_type; + op.t_run = (unsigned int) time(NULL); + op.t_rcchange = op.t_run; + op.call_id = generate_callid(); + lrmd__set_result(&op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); + process_lrm_event(lrm_state, &op, NULL, NULL); +} + +void +remote_lrm_op_callback(lrmd_event_data_t * op) +{ + gboolean cmd_handled = FALSE; + lrm_state_t *lrm_state = NULL; + remote_ra_data_t *ra_data = NULL; + remote_ra_cmd_t *cmd = NULL; + + crm_debug("Processing '%s%s%s' event on remote connection to %s: %s " + "(%d) status=%s (%d)", + (op->op_type? op->op_type : ""), (op->op_type? " " : ""), + lrmd_event_type2str(op->type), op->remote_nodename, + services_ocf_exitcode_str(op->rc), op->rc, + pcmk_exec_status_str(op->op_status), op->op_status); + + lrm_state = lrm_state_find(op->remote_nodename); + if (!lrm_state || !lrm_state->remote_ra_data) { + crm_debug("No state information found for remote connection event"); + return; + } + ra_data = lrm_state->remote_ra_data; + + if (op->type == lrmd_event_new_client) { + // Another client has connected to the remote daemon + + if (pcmk_is_set(ra_data->status, expect_takeover)) { + // Great, we knew this was coming + lrm_remote_clear_flags(lrm_state, expect_takeover); + lrm_remote_set_flags(lrm_state, takeover_complete); + + } else { + crm_err("Disconnecting from Pacemaker Remote node %s due to " + "unexpected client takeover", op->remote_nodename); + /* In this case, lrmd_tls_connection_destroy() will be called under the control of mainloop. */ + /* Do not free lrm_state->conn yet. */ + /* It'll be freed in the following stop action. */ + lrm_state_disconnect_only(lrm_state); + } + return; + } + + /* filter all EXEC events up */ + if (op->type == lrmd_event_exec_complete) { + if (pcmk_is_set(ra_data->status, takeover_complete)) { + crm_debug("ignoring event, this connection is taken over by another node"); + } else { + lrm_op_callback(op); + } + return; + } + + if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) { + + if (!pcmk_is_set(ra_data->status, remote_active)) { + crm_debug("Disconnection from Pacemaker Remote node %s complete", + lrm_state->node_name); + + } else if (!remote_ra_is_in_maintenance(lrm_state)) { + crm_err("Lost connection to Pacemaker Remote node %s", + lrm_state->node_name); + ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds); + ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds); + + } else { + crm_notice("Unmanaged Pacemaker Remote node %s disconnected", + lrm_state->node_name); + /* Do roughly what a 'stop' on the remote-resource would do */ + handle_remote_ra_stop(lrm_state, NULL); + remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM); + /* now fake the reply of a successful 'stop' */ + synthesize_lrmd_success(NULL, lrm_state->node_name, "stop"); + } + return; + } + + if (!ra_data->cur_cmd) { + crm_debug("no event to match"); + return; + } + + cmd = ra_data->cur_cmd; + + /* Start actions and migrate from actions complete after connection + * comes back to us. */ + if (op->type == lrmd_event_connect && pcmk__strcase_any_of(cmd->action, "start", + "migrate_from", NULL)) { + if (op->connection_rc < 0) { + update_remaining_timeout(cmd); + + if ((op->connection_rc == -ENOKEY) + || (op->connection_rc == -EKEYREJECTED)) { + // Hard error, don't retry + pcmk__set_result(&(cmd->result), PCMK_OCF_INVALID_PARAM, + PCMK_EXEC_ERROR, + pcmk_strerror(op->connection_rc)); + + } else if (cmd->remaining_timeout > 3000) { + crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout); + g_timeout_add(1000, retry_start_cmd_cb, lrm_state); + return; + + } else { + crm_trace("can't reschedule start, remaining timeout too small %d", + cmd->remaining_timeout); + pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, + PCMK_EXEC_TIMEOUT, + "%s without enough time to retry", + pcmk_strerror(op->connection_rc)); + } + + } else { + lrm_state_reset_tables(lrm_state, TRUE); + pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); + lrm_remote_set_flags(lrm_state, remote_active); + } + + crm_debug("Remote connection event matched %s action", cmd->action); + report_remote_ra_result(cmd); + cmd_handled = TRUE; + + } else if (op->type == lrmd_event_poke && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + + if (cmd->monitor_timeout_id) { + g_source_remove(cmd->monitor_timeout_id); + cmd->monitor_timeout_id = 0; + } + + /* Only report success the first time, after that only worry about failures. + * For this function, if we get the poke pack, it is always a success. Pokes + * only fail if the send fails, or the response times out. */ + if (!pcmk_is_set(cmd->status, cmd_reported_success)) { + pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); + report_remote_ra_result(cmd); + cmd_set_flags(cmd, cmd_reported_success); + } + + crm_debug("Remote poke event matched %s action", cmd->action); + + /* success, keep rescheduling if interval is present. */ + if (cmd->interval_ms && !pcmk_is_set(cmd->status, cmd_cancel)) { + ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd); + cmd->interval_id = g_timeout_add(cmd->interval_ms, + recurring_helper, cmd); + cmd = NULL; /* prevent free */ + } + cmd_handled = TRUE; + + } else if (op->type == lrmd_event_disconnect && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + if (pcmk_is_set(ra_data->status, remote_active) && + !pcmk_is_set(cmd->status, cmd_cancel)) { + pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, + PCMK_EXEC_ERROR, + "Remote connection unexpectedly dropped " + "during monitor"); + report_remote_ra_result(cmd); + crm_err("Remote connection to %s unexpectedly dropped during monitor", + lrm_state->node_name); + } + cmd_handled = TRUE; + + } else if (op->type == lrmd_event_new_client && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { + + handle_remote_ra_stop(lrm_state, cmd); + cmd_handled = TRUE; + + } else { + crm_debug("Event did not match %s action", ra_data->cur_cmd->action); + } + + if (cmd_handled) { + ra_data->cur_cmd = NULL; + if (ra_data->cmds) { + mainloop_set_trigger(ra_data->work); + } + free_cmd(cmd); + } +} + +static void +handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd) +{ + remote_ra_data_t *ra_data = NULL; + + CRM_ASSERT(lrm_state); + ra_data = lrm_state->remote_ra_data; + + if (!pcmk_is_set(ra_data->status, takeover_complete)) { + /* delete pending ops when ever the remote connection is intentionally stopped */ + g_hash_table_remove_all(lrm_state->active_ops); + } else { + /* we no longer hold the history if this connection has been migrated, + * however, we keep metadata cache for future use */ + lrm_state_reset_tables(lrm_state, FALSE); + } + + lrm_remote_clear_flags(lrm_state, remote_active); + lrm_state_disconnect(lrm_state); + + if (ra_data->cmds) { + g_list_free_full(ra_data->cmds, free_cmd); + } + if (ra_data->recurring_cmds) { + g_list_free_full(ra_data->recurring_cmds, free_cmd); + } + ra_data->cmds = NULL; + ra_data->recurring_cmds = NULL; + ra_data->cur_cmd = NULL; + + if (cmd) { + pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); + report_remote_ra_result(cmd); + } +} + +// \return Standard Pacemaker return code +static int +handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms) +{ + const char *server = NULL; + lrmd_key_value_t *tmp = NULL; + int port = 0; + int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms; + int rc = pcmk_rc_ok; + + for (tmp = cmd->params; tmp; tmp = tmp->next) { + if (pcmk__strcase_any_of(tmp->key, XML_RSC_ATTR_REMOTE_RA_ADDR, + XML_RSC_ATTR_REMOTE_RA_SERVER, NULL)) { + server = tmp->value; + } else if (pcmk__str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT, pcmk__str_casei)) { + port = atoi(tmp->value); + } else if (pcmk__str_eq(tmp->key, CRM_META "_" XML_RSC_ATTR_CONTAINER, pcmk__str_casei)) { + lrm_remote_set_flags(lrm_state, controlling_guest); + } + } + + rc = controld_connect_remote_executor(lrm_state, server, port, + timeout_used); + if (rc != pcmk_rc_ok) { + pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, + PCMK_EXEC_ERROR, + "Could not connect to Pacemaker Remote node %s: %s", + lrm_state->node_name, pcmk_rc_str(rc)); + } + return rc; +} + +static gboolean +handle_remote_ra_exec(gpointer user_data) +{ + int rc = 0; + lrm_state_t *lrm_state = user_data; + remote_ra_data_t *ra_data = lrm_state->remote_ra_data; + remote_ra_cmd_t *cmd; + GList *first = NULL; + + if (ra_data->cur_cmd) { + /* still waiting on previous cmd */ + return TRUE; + } + + while (ra_data->cmds) { + first = ra_data->cmds; + cmd = first->data; + if (cmd->delay_id) { + /* still waiting for start delay timer to trip */ + return TRUE; + } + + ra_data->cmds = g_list_remove_link(ra_data->cmds, first); + g_list_free_1(first); + + if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) { + lrm_remote_clear_flags(lrm_state, expect_takeover | takeover_complete); + if (handle_remote_ra_start(lrm_state, cmd, + cmd->timeout) == pcmk_rc_ok) { + /* take care of this later when we get async connection result */ + crm_debug("Initiated async remote connection, %s action will complete after connect event", + cmd->action); + ra_data->cur_cmd = cmd; + return TRUE; + } + report_remote_ra_result(cmd); + + } else if (!strcmp(cmd->action, "monitor")) { + + if (lrm_state_is_connected(lrm_state) == TRUE) { + rc = lrm_state_poke_connection(lrm_state); + if (rc < 0) { + pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, + PCMK_EXEC_ERROR, pcmk_strerror(rc)); + } + } else { + rc = -1; + pcmk__set_result(&(cmd->result), PCMK_OCF_NOT_RUNNING, + PCMK_EXEC_DONE, "Remote connection inactive"); + } + + if (rc == 0) { + crm_debug("Poked Pacemaker Remote at node %s, waiting for async response", + cmd->rsc_id); + ra_data->cur_cmd = cmd; + cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd); + return TRUE; + } + report_remote_ra_result(cmd); + + } else if (!strcmp(cmd->action, "stop")) { + + if (pcmk_is_set(ra_data->status, expect_takeover)) { + /* briefly wait on stop for the takeover event to occur. If the + * takeover event does not occur during the wait period, that's fine. + * It just means that the remote-node's lrm_status section is going to get + * cleared which will require all the resources running in the remote-node + * to be explicitly re-detected via probe actions. If the takeover does occur + * successfully, then we can leave the status section intact. */ + cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd); + ra_data->cur_cmd = cmd; + return TRUE; + } + + handle_remote_ra_stop(lrm_state, cmd); + + } else if (!strcmp(cmd->action, "migrate_to")) { + lrm_remote_clear_flags(lrm_state, takeover_complete); + lrm_remote_set_flags(lrm_state, expect_takeover); + pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); + report_remote_ra_result(cmd); + } else if (pcmk__str_any_of(cmd->action, CRMD_ACTION_RELOAD, + CRMD_ACTION_RELOAD_AGENT, NULL)) { + /* Currently the only reloadable parameter is reconnect_interval, + * which is only used by the scheduler via the CIB, so reloads are a + * no-op. + * + * @COMPAT DC <2.1.0: We only need to check for "reload" in case + * we're in a rolling upgrade with a DC scheduling "reload" instead + * of "reload-agent". An OCF 1.1 "reload" would be a no-op anyway, + * so this would work for that purpose as well. + */ + pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); + report_remote_ra_result(cmd); + } + + free_cmd(cmd); + } + + return TRUE; +} + +static void +remote_ra_data_init(lrm_state_t * lrm_state) +{ + remote_ra_data_t *ra_data = NULL; + + if (lrm_state->remote_ra_data) { + return; + } + + ra_data = calloc(1, sizeof(remote_ra_data_t)); + ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state); + lrm_state->remote_ra_data = ra_data; +} + +void +remote_ra_cleanup(lrm_state_t * lrm_state) +{ + remote_ra_data_t *ra_data = lrm_state->remote_ra_data; + + if (!ra_data) { + return; + } + + if (ra_data->cmds) { + g_list_free_full(ra_data->cmds, free_cmd); + } + + if (ra_data->recurring_cmds) { + g_list_free_full(ra_data->recurring_cmds, free_cmd); + } + mainloop_destroy_trigger(ra_data->work); + free(ra_data); + lrm_state->remote_ra_data = NULL; +} + +gboolean +is_remote_lrmd_ra(const char *agent, const char *provider, const char *id) +{ + if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) { + return TRUE; + } + if ((id != NULL) && (lrm_state_find(id) != NULL) + && !pcmk__str_eq(id, controld_globals.our_nodename, pcmk__str_casei)) { + return TRUE; + } + + return FALSE; +} + +lrmd_rsc_info_t * +remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id) +{ + lrmd_rsc_info_t *info = NULL; + + if ((lrm_state_find(rsc_id))) { + info = calloc(1, sizeof(lrmd_rsc_info_t)); + + info->id = strdup(rsc_id); + info->type = strdup(REMOTE_LRMD_RA); + info->standard = strdup(PCMK_RESOURCE_CLASS_OCF); + info->provider = strdup("pacemaker"); + } + + return info; +} + +static gboolean +is_remote_ra_supported_action(const char *action) +{ + return pcmk__str_any_of(action, + CRMD_ACTION_START, + CRMD_ACTION_STOP, + CRMD_ACTION_STATUS, + CRMD_ACTION_MIGRATE, + CRMD_ACTION_MIGRATED, + CRMD_ACTION_RELOAD_AGENT, + CRMD_ACTION_RELOAD, + NULL); +} + +static GList * +fail_all_monitor_cmds(GList * list) +{ + GList *rm_list = NULL; + remote_ra_cmd_t *cmd = NULL; + GList *gIter = NULL; + + for (gIter = list; gIter != NULL; gIter = gIter->next) { + cmd = gIter->data; + if ((cmd->interval_ms > 0) && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + rm_list = g_list_append(rm_list, cmd); + } + } + + for (gIter = rm_list; gIter != NULL; gIter = gIter->next) { + cmd = gIter->data; + + pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, + PCMK_EXEC_ERROR, "Lost connection to remote executor"); + crm_trace("Pre-emptively failing %s %s (interval=%u, %s)", + cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata); + report_remote_ra_result(cmd); + + list = g_list_remove(list, cmd); + free_cmd(cmd); + } + + /* frees only the list data, not the cmds */ + g_list_free(rm_list); + return list; +} + +static GList * +remove_cmd(GList * list, const char *action, guint interval_ms) +{ + remote_ra_cmd_t *cmd = NULL; + GList *gIter = NULL; + + for (gIter = list; gIter != NULL; gIter = gIter->next) { + cmd = gIter->data; + if ((cmd->interval_ms == interval_ms) + && pcmk__str_eq(cmd->action, action, pcmk__str_casei)) { + break; + } + cmd = NULL; + } + if (cmd) { + list = g_list_remove(list, cmd); + free_cmd(cmd); + } + return list; +} + +int +remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id, + const char *action, guint interval_ms) +{ + lrm_state_t *connection_rsc = NULL; + remote_ra_data_t *ra_data = NULL; + + connection_rsc = lrm_state_find(rsc_id); + if (!connection_rsc || !connection_rsc->remote_ra_data) { + return -EINVAL; + } + + ra_data = connection_rsc->remote_ra_data; + ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms); + ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action, + interval_ms); + if (ra_data->cur_cmd && + (ra_data->cur_cmd->interval_ms == interval_ms) && + (pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) { + + cmd_set_flags(ra_data->cur_cmd, cmd_cancel); + } + + return 0; +} + +static remote_ra_cmd_t * +handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms, + const char *userdata) +{ + GList *gIter = NULL; + remote_ra_cmd_t *cmd = NULL; + + /* there are 3 places a potential duplicate monitor operation + * could exist. + * 1. recurring_cmds list. where the op is waiting for its next interval + * 2. cmds list, where the op is queued to get executed immediately + * 3. cur_cmd, which means the monitor op is in flight right now. + */ + if (interval_ms == 0) { + return NULL; + } + + if (ra_data->cur_cmd && + !pcmk_is_set(ra_data->cur_cmd->status, cmd_cancel) && + (ra_data->cur_cmd->interval_ms == interval_ms) && + pcmk__str_eq(ra_data->cur_cmd->action, "monitor", pcmk__str_casei)) { + + cmd = ra_data->cur_cmd; + goto handle_dup; + } + + for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) { + cmd = gIter->data; + if ((cmd->interval_ms == interval_ms) + && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + goto handle_dup; + } + } + + for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) { + cmd = gIter->data; + if ((cmd->interval_ms == interval_ms) + && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + goto handle_dup; + } + } + + return NULL; + +handle_dup: + + crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT, + cmd->rsc_id, "monitor", interval_ms); + + /* update the userdata */ + if (userdata) { + free(cmd->userdata); + cmd->userdata = strdup(userdata); + } + + /* if we've already reported success, generate a new call id */ + if (pcmk_is_set(cmd->status, cmd_reported_success)) { + cmd->start_time = time(NULL); + cmd->call_id = generate_callid(); + cmd_clear_flags(cmd, cmd_reported_success); + } + + /* if we have an interval_id set, that means we are in the process of + * waiting for this cmd's next interval. instead of waiting, cancel + * the timer and execute the action immediately */ + if (cmd->interval_id) { + g_source_remove(cmd->interval_id); + cmd->interval_id = 0; + recurring_helper(cmd); + } + + return cmd; +} + +/*! + * \internal + * \brief Execute an action using the (internal) ocf:pacemaker:remote agent + * + * \param[in] lrm_state Executor state object for remote connection + * \param[in] rsc_id Connection resource ID + * \param[in] action Action to execute + * \param[in] userdata String to copy and pass to execution callback + * \param[in] interval_ms Action interval (in milliseconds) + * \param[in] timeout_ms Action timeout (in milliseconds) + * \param[in] start_delay_ms Delay (in milliseconds) before executing action + * \param[in,out] params Connection resource parameters + * \param[out] call_id Where to store call ID on success + * + * \return Standard Pacemaker return code + * \note This takes ownership of \p params, which should not be used or freed + * after calling this function. + */ +int +controld_execute_remote_agent(const lrm_state_t *lrm_state, const char *rsc_id, + const char *action, const char *userdata, + guint interval_ms, int timeout_ms, + int start_delay_ms, lrmd_key_value_t *params, + int *call_id) +{ + lrm_state_t *connection_rsc = NULL; + remote_ra_cmd_t *cmd = NULL; + remote_ra_data_t *ra_data = NULL; + + *call_id = 0; + + CRM_CHECK((lrm_state != NULL) && (rsc_id != NULL) && (action != NULL) + && (userdata != NULL) && (call_id != NULL), + lrmd_key_value_freeall(params); return EINVAL); + + if (!is_remote_ra_supported_action(action)) { + lrmd_key_value_freeall(params); + return EOPNOTSUPP; + } + + connection_rsc = lrm_state_find(rsc_id); + if (connection_rsc == NULL) { + lrmd_key_value_freeall(params); + return ENOTCONN; + } + + remote_ra_data_init(connection_rsc); + ra_data = connection_rsc->remote_ra_data; + + cmd = handle_dup_monitor(ra_data, interval_ms, userdata); + if (cmd) { + *call_id = cmd->call_id; + lrmd_key_value_freeall(params); + return pcmk_rc_ok; + } + + cmd = calloc(1, sizeof(remote_ra_cmd_t)); + if (cmd == NULL) { + lrmd_key_value_freeall(params); + return ENOMEM; + } + + cmd->owner = strdup(lrm_state->node_name); + cmd->rsc_id = strdup(rsc_id); + cmd->action = strdup(action); + cmd->userdata = strdup(userdata); + if ((cmd->owner == NULL) || (cmd->rsc_id == NULL) || (cmd->action == NULL) + || (cmd->userdata == NULL)) { + free_cmd(cmd); + lrmd_key_value_freeall(params); + return ENOMEM; + } + + cmd->interval_ms = interval_ms; + cmd->timeout = timeout_ms; + cmd->start_delay = start_delay_ms; + cmd->params = params; + cmd->start_time = time(NULL); + + cmd->call_id = generate_callid(); + + if (cmd->start_delay) { + cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); + } + + ra_data->cmds = g_list_append(ra_data->cmds, cmd); + mainloop_set_trigger(ra_data->work); + + *call_id = cmd->call_id; + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Immediately fail all monitors of a remote node, if proxied here + * + * \param[in] node_name Name of pacemaker_remote node + */ +void +remote_ra_fail(const char *node_name) +{ + lrm_state_t *lrm_state = lrm_state_find(node_name); + + if (lrm_state && lrm_state_is_connected(lrm_state)) { + remote_ra_data_t *ra_data = lrm_state->remote_ra_data; + + crm_info("Failing monitors on Pacemaker Remote node %s", node_name); + ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds); + ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds); + } +} + +/* A guest node fencing implied by host fencing looks like: + * + * + * + * + * + * + * + */ +#define XPATH_PSEUDO_FENCE "/" XML_GRAPH_TAG_PSEUDO_EVENT \ + "[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \ + "/" XML_CIB_TAG_NODE + +/*! + * \internal + * \brief Check a pseudo-action for Pacemaker Remote node side effects + * + * \param[in,out] xml XML of pseudo-action to check + */ +void +remote_ra_process_pseudo(xmlNode *xml) +{ + xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE); + + if (numXpathResults(search) == 1) { + xmlNode *result = getXpathResult(search, 0); + + /* Normally, we handle the necessary side effects of a guest node stop + * action when reporting the remote agent's result. However, if the stop + * is implied due to fencing, it will be a fencing pseudo-event, and + * there won't be a result to report. Handle that case here. + * + * This will result in a duplicate call to remote_node_down() if the + * guest stop was real instead of implied, but that shouldn't hurt. + * + * There is still one corner case that isn't handled: if a guest node + * isn't running any resources when its host is fenced, it will appear + * to be cleanly stopped, so there will be no pseudo-fence, and our + * peer cache state will be incorrect unless and until the guest is + * recovered. + */ + if (result) { + const char *remote = ID(result); + + if (remote) { + remote_node_down(remote, DOWN_ERASE_LRM); + } + } + } + freeXpathObject(search); +} + +static void +remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance) +{ + xmlNode *update, *state; + int call_opt; + crm_node_t *node; + + call_opt = crmd_cib_smart_opt(); + node = crm_remote_peer_get(lrm_state->node_name); + CRM_CHECK(node != NULL, return); + update = create_xml_node(NULL, XML_CIB_TAG_STATUS); + state = create_node_state_update(node, node_update_none, update, + __func__); + crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0"); + if (controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, + NULL) == pcmk_rc_ok) { + /* TODO: still not 100% sure that async update will succeed ... */ + if (maintenance) { + lrm_remote_set_flags(lrm_state, remote_in_maint); + } else { + lrm_remote_clear_flags(lrm_state, remote_in_maint); + } + } + free_xml(update); +} + +#define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \ + "[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \ + XML_GRAPH_TAG_MAINTENANCE + +/*! + * \internal + * \brief Check a pseudo-action holding updates for maintenance state + * + * \param[in,out] xml XML of pseudo-action to check + */ +void +remote_ra_process_maintenance_nodes(xmlNode *xml) +{ + xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE); + + if (numXpathResults(search) == 1) { + xmlNode *node; + int cnt = 0, cnt_remote = 0; + + for (node = + first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE); + node != NULL; node = pcmk__xml_next(node)) { + lrm_state_t *lrm_state = lrm_state_find(ID(node)); + + cnt++; + if (lrm_state && lrm_state->remote_ra_data && + pcmk_is_set(((remote_ra_data_t *) lrm_state->remote_ra_data)->status, remote_active)) { + int is_maint; + + cnt_remote++; + pcmk__scan_min_int(crm_element_value(node, XML_NODE_IS_MAINTENANCE), + &is_maint, 0); + remote_ra_maintenance(lrm_state, is_maint); + } + } + crm_trace("Action holds %d nodes (%d remotes found) " + "adjusting maintenance-mode", cnt, cnt_remote); + } + freeXpathObject(search); +} + +gboolean +remote_ra_is_in_maintenance(lrm_state_t * lrm_state) +{ + remote_ra_data_t *ra_data = lrm_state->remote_ra_data; + return pcmk_is_set(ra_data->status, remote_in_maint); +} + +gboolean +remote_ra_controlling_guest(lrm_state_t * lrm_state) +{ + remote_ra_data_t *ra_data = lrm_state->remote_ra_data; + return pcmk_is_set(ra_data->status, controlling_guest); +} diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c new file mode 100644 index 0000000..912f9a5 --- /dev/null +++ b/daemons/controld/controld_schedulerd.c @@ -0,0 +1,506 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include /* pid_t, sleep, ssize_t */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static void handle_disconnect(void); + +static pcmk_ipc_api_t *schedulerd_api = NULL; + +/*! + * \internal + * \brief Close any scheduler connection and free associated memory + */ +void +controld_shutdown_schedulerd_ipc(void) +{ + controld_clear_fsa_input_flags(R_PE_REQUIRED); + pcmk_disconnect_ipc(schedulerd_api); + handle_disconnect(); + + pcmk_free_ipc_api(schedulerd_api); + schedulerd_api = NULL; +} + +/*! + * \internal + * \brief Save CIB query result to file, raising FSA error + * + * \param[in] msg Ignored + * \param[in] call_id Call ID of CIB query + * \param[in] rc Return code of CIB query + * \param[in,out] output Result of CIB query + * \param[in] user_data Unique identifier for filename + * + * \note This is intended to be called after a scheduler connection fails. + */ +static void +save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output, + void *user_data) +{ + const char *id = user_data; + + register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); + CRM_CHECK(id != NULL, return); + + if (rc == pcmk_ok) { + char *filename = crm_strdup_printf(PE_STATE_DIR "/pe-core-%s.bz2", id); + + if (write_xml_file(output, filename, TRUE) < 0) { + crm_err("Could not save Cluster Information Base to %s after scheduler crash", + filename); + } else { + crm_notice("Saved Cluster Information Base to %s after scheduler crash", + filename); + } + free(filename); + } +} + +/*! + * \internal + * \brief Respond to scheduler connection failure + */ +static void +handle_disconnect(void) +{ + // If we aren't connected to the scheduler, we can't expect a reply + controld_expect_sched_reply(NULL); + + if (pcmk_is_set(controld_globals.fsa_input_register, R_PE_REQUIRED)) { + int rc = pcmk_ok; + char *uuid_str = crm_generate_uuid(); + + crm_crit("Connection to the scheduler failed " + CRM_XS " uuid=%s", uuid_str); + + /* + * The scheduler died... + * + * Save the current CIB so that we have a chance of + * figuring out what killed it. + * + * Delay raising the I_ERROR until the query below completes or + * 5s is up, whichever comes first. + * + */ + rc = controld_globals.cib_conn->cmds->query(controld_globals.cib_conn, + NULL, NULL, + cib_scope_local); + fsa_register_cib_callback(rc, uuid_str, save_cib_contents); + + } else { + crm_info("Connection to the scheduler released"); + } + + controld_clear_fsa_input_flags(R_PE_CONNECTED); + controld_trigger_fsa(); + return; +} + +static void +handle_reply(pcmk_schedulerd_api_reply_t *reply) +{ + const char *msg_ref = NULL; + + if (!AM_I_DC) { + return; + } + + msg_ref = reply->data.graph.reference; + + if (msg_ref == NULL) { + crm_err("%s - Ignoring calculation with no reference", CRM_OP_PECALC); + + } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref, + pcmk__str_none)) { + ha_msg_input_t fsa_input; + xmlNode *crm_data_node; + + controld_stop_sched_timer(); + + /* do_te_invoke (which will eventually process the fsa_input we are constructing + * here) requires that fsa_input.xml be non-NULL. That will only happen if + * copy_ha_msg_input (which is called by register_fsa_input_adv) sees the + * fsa_input.msg that it is expecting. The scheduler's IPC dispatch function + * gave us the values we need, we just need to put them into XML. + * + * The name of the top level element here is irrelevant. Nothing checks it. + */ + fsa_input.msg = create_xml_node(NULL, "dummy-reply"); + crm_xml_add(fsa_input.msg, XML_ATTR_REFERENCE, msg_ref); + crm_xml_add(fsa_input.msg, F_CRM_TGRAPH_INPUT, reply->data.graph.input); + + crm_data_node = create_xml_node(fsa_input.msg, F_CRM_DATA); + add_node_copy(crm_data_node, reply->data.graph.tgraph); + register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input); + + free_xml(fsa_input.msg); + + } else { + crm_info("%s calculation %s is obsolete", CRM_OP_PECALC, msg_ref); + } +} + +static void +scheduler_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, + crm_exit_t status, void *event_data, void *user_data) +{ + pcmk_schedulerd_api_reply_t *reply = event_data; + + switch (event_type) { + case pcmk_ipc_event_disconnect: + handle_disconnect(); + break; + + case pcmk_ipc_event_reply: + handle_reply(reply); + break; + + default: + break; + } +} + +static bool +new_schedulerd_ipc_connection(void) +{ + int rc; + + controld_set_fsa_input_flags(R_PE_REQUIRED); + + if (schedulerd_api == NULL) { + rc = pcmk_new_ipc_api(&schedulerd_api, pcmk_ipc_schedulerd); + + if (rc != pcmk_rc_ok) { + crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc)); + return false; + } + } + + pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL); + + rc = pcmk_connect_ipc(schedulerd_api, pcmk_ipc_dispatch_main); + if (rc != pcmk_rc_ok) { + crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc)); + return false; + } + + controld_set_fsa_input_flags(R_PE_CONNECTED); + return true; +} + +static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc, + xmlNode *output, void *user_data); + +/* A_PE_START, A_PE_STOP, O_PE_RESTART */ +void +do_pe_control(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + if (pcmk_is_set(action, A_PE_STOP)) { + controld_clear_fsa_input_flags(R_PE_REQUIRED); + pcmk_disconnect_ipc(schedulerd_api); + handle_disconnect(); + } + if (pcmk_is_set(action, A_PE_START) + && !pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) { + + if (cur_state == S_STOPPING) { + crm_info("Ignoring request to connect to scheduler while shutting down"); + + } else if (!new_schedulerd_ipc_connection()) { + crm_warn("Could not connect to scheduler"); + register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); + } + } +} + +static int fsa_pe_query = 0; +static mainloop_timer_t *controld_sched_timer = NULL; + +// @TODO Make this a configurable cluster option if there's demand for it +#define SCHED_TIMEOUT_MS (120000) + +/*! + * \internal + * \brief Handle a timeout waiting for scheduler reply + * + * \param[in] user_data Ignored + * + * \return FALSE (indicating that timer should not be restarted) + */ +static gboolean +controld_sched_timeout(gpointer user_data) +{ + if (AM_I_DC) { + /* If this node is the DC but can't communicate with the scheduler, just + * exit (and likely get fenced) so this node doesn't interfere with any + * further DC elections. + * + * @TODO We could try something less drastic first, like disconnecting + * and reconnecting to the scheduler, but something is likely going + * seriously wrong, so perhaps it's better to just fail as quickly as + * possible. + */ + crmd_exit(CRM_EX_FATAL); + } + return FALSE; +} + +void +controld_stop_sched_timer(void) +{ + if ((controld_sched_timer != NULL) + && (controld_globals.fsa_pe_ref != NULL)) { + crm_trace("Stopping timer for scheduler reply %s", + controld_globals.fsa_pe_ref); + } + mainloop_timer_stop(controld_sched_timer); +} + +/*! + * \internal + * \brief Set the scheduler request currently being waited on + * + * \param[in] ref Request to expect reply to (or NULL for none) + * + * \note This function takes ownership of \p ref. + */ +void +controld_expect_sched_reply(char *ref) +{ + if (ref) { + if (controld_sched_timer == NULL) { + controld_sched_timer = mainloop_timer_add("scheduler_reply_timer", + SCHED_TIMEOUT_MS, FALSE, + controld_sched_timeout, + NULL); + } + mainloop_timer_start(controld_sched_timer); + } else { + controld_stop_sched_timer(); + } + free(controld_globals.fsa_pe_ref); + controld_globals.fsa_pe_ref = ref; +} + +/*! + * \internal + * \brief Free the scheduler reply timer + */ +void +controld_free_sched_timer(void) +{ + if (controld_sched_timer != NULL) { + mainloop_timer_del(controld_sched_timer); + controld_sched_timer = NULL; + } +} + +/* A_PE_INVOKE */ +void +do_pe_invoke(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + cib_t *cib_conn = controld_globals.cib_conn; + + if (AM_I_DC == FALSE) { + crm_err("Not invoking scheduler because not DC: %s", + fsa_action2string(action)); + return; + } + + if (!pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) { + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + crm_err("Cannot shut down gracefully without the scheduler"); + register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL); + + } else { + crm_info("Waiting for the scheduler to connect"); + crmd_fsa_stall(FALSE); + controld_set_fsa_action_flags(A_PE_START); + controld_trigger_fsa(); + } + return; + } + + if (cur_state != S_POLICY_ENGINE) { + crm_notice("Not invoking scheduler because in state %s", + fsa_state2string(cur_state)); + return; + } + if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { + crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!"); + + /* start the join from scratch */ + register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL); + return; + } + + fsa_pe_query = cib_conn->cmds->query(cib_conn, NULL, NULL, cib_scope_local); + + crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query, + fsa_state2string(controld_globals.fsa_state)); + + controld_expect_sched_reply(NULL); + fsa_register_cib_callback(fsa_pe_query, NULL, do_pe_invoke_callback); +} + +static void +force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value) +{ + int max = 0; + int lpc = 0; + const char *xpath_base = NULL; + char *xpath_string = NULL; + xmlXPathObjectPtr xpathObj = NULL; + + xpath_base = pcmk_cib_xpath_for(XML_CIB_TAG_CRMCONFIG); + if (xpath_base == NULL) { + crm_err(XML_CIB_TAG_CRMCONFIG " CIB element not known (bug?)"); + return; + } + + xpath_string = crm_strdup_printf("%s//%s//nvpair[@name='%s']", + xpath_base, XML_CIB_TAG_PROPSET, + attr_name); + xpathObj = xpath_search(xml, xpath_string); + max = numXpathResults(xpathObj); + free(xpath_string); + + for (lpc = 0; lpc < max; lpc++) { + xmlNode *match = getXpathResult(xpathObj, lpc); + crm_trace("Forcing %s/%s = %s", ID(match), attr_name, attr_value); + crm_xml_add(match, XML_NVPAIR_ATTR_VALUE, attr_value); + } + + if(max == 0) { + xmlNode *configuration = NULL; + xmlNode *crm_config = NULL; + xmlNode *cluster_property_set = NULL; + + crm_trace("Creating %s-%s for %s=%s", + CIB_OPTIONS_FIRST, attr_name, attr_name, attr_value); + + configuration = pcmk__xe_match(xml, XML_CIB_TAG_CONFIGURATION, NULL, + NULL); + if (configuration == NULL) { + configuration = create_xml_node(xml, XML_CIB_TAG_CONFIGURATION); + } + + crm_config = pcmk__xe_match(configuration, XML_CIB_TAG_CRMCONFIG, NULL, + NULL); + if (crm_config == NULL) { + crm_config = create_xml_node(configuration, XML_CIB_TAG_CRMCONFIG); + } + + cluster_property_set = pcmk__xe_match(crm_config, XML_CIB_TAG_PROPSET, + NULL, NULL); + if (cluster_property_set == NULL) { + cluster_property_set = create_xml_node(crm_config, XML_CIB_TAG_PROPSET); + crm_xml_add(cluster_property_set, XML_ATTR_ID, CIB_OPTIONS_FIRST); + } + + xml = create_xml_node(cluster_property_set, XML_CIB_TAG_NVPAIR); + + crm_xml_set_id(xml, "%s-%s", CIB_OPTIONS_FIRST, attr_name); + crm_xml_add(xml, XML_NVPAIR_ATTR_NAME, attr_name); + crm_xml_add(xml, XML_NVPAIR_ATTR_VALUE, attr_value); + } + freeXpathObject(xpathObj); +} + +static void +do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + char *ref = NULL; + pid_t watchdog = pcmk__locate_sbd(); + + if (rc != pcmk_ok) { + crm_err("Could not retrieve the Cluster Information Base: %s " + CRM_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id); + register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); + return; + + } else if (call_id != fsa_pe_query) { + crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query); + return; + + } else if (!AM_I_DC + || !pcmk_is_set(controld_globals.fsa_input_register, + R_PE_CONNECTED)) { + crm_debug("No need to invoke the scheduler anymore"); + return; + + } else if (controld_globals.fsa_state != S_POLICY_ENGINE) { + crm_debug("Discarding scheduler request in state: %s", + fsa_state2string(controld_globals.fsa_state)); + return; + + /* this callback counts as 1 */ + } else if (num_cib_op_callbacks() > 1) { + crm_debug("Re-asking for the CIB: %d other peer updates still pending", + (num_cib_op_callbacks() - 1)); + sleep(1); + controld_set_fsa_action_flags(A_PE_INVOKE); + controld_trigger_fsa(); + return; + } + + CRM_LOG_ASSERT(output != NULL); + + /* Refresh the remote node cache and the known node cache when the + * scheduler is invoked */ + pcmk__refresh_node_caches_from_cib(output); + + crm_xml_add(output, XML_ATTR_DC_UUID, controld_globals.our_uuid); + pcmk__xe_set_bool_attr(output, XML_ATTR_HAVE_QUORUM, + pcmk_is_set(controld_globals.flags, + controld_has_quorum)); + + force_local_option(output, XML_ATTR_HAVE_WATCHDOG, pcmk__btoa(watchdog)); + + if (pcmk_is_set(controld_globals.flags, controld_ever_had_quorum) + && !crm_have_quorum) { + crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1); + } + + rc = pcmk_rc2legacy(pcmk_schedulerd_api_graph(schedulerd_api, output, &ref)); + + if (rc < 0) { + crm_err("Could not contact the scheduler: %s " CRM_XS " rc=%d", + pcmk_strerror(rc), rc); + register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); + } else { + CRM_ASSERT(ref != NULL); + controld_expect_sched_reply(ref); + crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, " + "quorate=%s", fsa_pe_query, controld_globals.fsa_pe_ref, + crm_peer_seq, pcmk__btoa(pcmk_is_set(controld_globals.flags, + controld_has_quorum))); + } +} diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c new file mode 100644 index 0000000..d8cfcad --- /dev/null +++ b/daemons/controld/controld_te_actions.c @@ -0,0 +1,746 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include // lrmd_event_data_t, lrmd_free_event() +#include +#include +#include + +#include +#include + +static GHashTable *te_targets = NULL; +void send_rsc_command(pcmk__graph_action_t *action); +static void te_update_job_count(pcmk__graph_action_t *action, int offset); + +static void +te_start_action_timer(const pcmk__graph_t *graph, pcmk__graph_action_t *action) +{ + action->timer = g_timeout_add(action->timeout + graph->network_delay, + action_timer_callback, (void *) action); + CRM_ASSERT(action->timer != 0); +} + +/*! + * \internal + * \brief Execute a graph pseudo-action + * + * \param[in,out] graph Transition graph being executed + * \param[in,out] pseudo Pseudo-action to execute + * + * \return Standard Pacemaker return code + */ +static int +execute_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *pseudo) +{ + const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK); + + /* send to peers as well? */ + if (pcmk__str_eq(task, CRM_OP_MAINTENANCE_NODES, pcmk__str_casei)) { + GHashTableIter iter; + crm_node_t *node = NULL; + + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + xmlNode *cmd = NULL; + + if (pcmk__str_eq(controld_globals.our_nodename, node->uname, + pcmk__str_casei)) { + continue; + } + + cmd = create_request(task, pseudo->xml, node->uname, + CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); + send_cluster_message(node, crm_msg_crmd, cmd, FALSE); + free_xml(cmd); + } + + remote_ra_process_maintenance_nodes(pseudo->xml); + } else { + /* Check action for Pacemaker Remote node side effects */ + remote_ra_process_pseudo(pseudo->xml); + } + + crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id, + crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY)); + te_action_confirmed(pseudo, graph); + return pcmk_rc_ok; +} + +static int +get_target_rc(pcmk__graph_action_t *action) +{ + int exit_status; + + pcmk__scan_min_int(crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC), + &exit_status, 0); + return exit_status; +} + +/*! + * \internal + * \brief Execute a cluster action from a transition graph + * + * \param[in,out] graph Transition graph being executed + * \param[in,out] action Cluster action to execute + * + * \return Standard Pacemaker return code + */ +static int +execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) +{ + char *counter = NULL; + xmlNode *cmd = NULL; + gboolean is_local = FALSE; + + const char *id = NULL; + const char *task = NULL; + const char *value = NULL; + const char *on_node = NULL; + const char *router_node = NULL; + + gboolean rc = TRUE; + gboolean no_wait = FALSE; + + id = ID(action->xml); + CRM_CHECK(!pcmk__str_empty(id), return EPROTO); + + task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); + CRM_CHECK(!pcmk__str_empty(task), return EPROTO); + + on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + CRM_CHECK(!pcmk__str_empty(on_node), return pcmk_rc_node_unknown); + + router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); + if (router_node == NULL) { + router_node = on_node; + if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_none)) { + const char *mode = crm_element_value(action->xml, PCMK__XA_MODE); + + if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_none)) { + router_node = controld_globals.our_nodename; + } + } + } + + if (pcmk__str_eq(router_node, controld_globals.our_nodename, + pcmk__str_casei)) { + is_local = TRUE; + } + + value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); + if (crm_is_true(value)) { + no_wait = TRUE; + } + + crm_info("Handling controller request '%s' (%s on %s)%s%s", + id, task, on_node, (is_local? " locally" : ""), + (no_wait? " without waiting" : "")); + + if (is_local && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_none)) { + /* defer until everything else completes */ + crm_info("Controller request '%s' is a local shutdown", id); + graph->completion_action = pcmk__graph_shutdown; + graph->abort_reason = "local shutdown"; + te_action_confirmed(action, graph); + return pcmk_rc_ok; + + } else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_none)) { + crm_node_t *peer = crm_get_peer(0, router_node); + + pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN); + } + + cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); + + counter = pcmk__transition_key(controld_globals.transition_graph->id, + action->id, get_target_rc(action), + controld_globals.te_uuid); + crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); + + rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE); + free(counter); + free_xml(cmd); + + if (rc == FALSE) { + crm_err("Action %d failed: send", action->id); + return ECOMM; + + } else if (no_wait) { + te_action_confirmed(action, graph); + + } else { + if (action->timeout <= 0) { + crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead", + action->id, task, on_node, action->timeout, graph->network_delay); + action->timeout = (int) graph->network_delay; + } + te_start_action_timer(graph, action); + } + + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Synthesize an executor event for a resource action timeout + * + * \param[in] action Resource action that timed out + * \param[in] target_rc Expected result of action that timed out + * + * Synthesize an executor event for a resource action timeout. (If the executor + * gets a timeout while waiting for a resource action to complete, that will be + * reported via the usual callback. This timeout means we didn't hear from the + * executor itself or the controller that relayed the action to the executor.) + * + * \return Newly created executor event for result of \p action + * \note The caller is responsible for freeing the return value using + * lrmd_free_event(). + */ +static lrmd_event_data_t * +synthesize_timeout_event(const pcmk__graph_action_t *action, int target_rc) +{ + lrmd_event_data_t *op = NULL; + const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + const char *reason = NULL; + char *dynamic_reason = NULL; + + if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) { + reason = "Local executor did not return result in time"; + } else { + const char *router_node = NULL; + + router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); + if (router_node == NULL) { + router_node = target; + } + dynamic_reason = crm_strdup_printf("Controller on %s did not return " + "result in time", router_node); + reason = dynamic_reason; + } + + op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT, + PCMK_OCF_UNKNOWN_ERROR, reason); + op->call_id = -1; + op->user_data = pcmk__transition_key(controld_globals.transition_graph->id, + action->id, target_rc, + controld_globals.te_uuid); + free(dynamic_reason); + return op; +} + +static void +controld_record_action_event(pcmk__graph_action_t *action, + lrmd_event_data_t *op) +{ + cib_t *cib_conn = controld_globals.cib_conn; + + xmlNode *state = NULL; + xmlNode *rsc = NULL; + xmlNode *action_rsc = NULL; + + int rc = pcmk_ok; + + const char *rsc_id = NULL; + const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); + const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); + + int target_rc = get_target_rc(action); + + action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE); + if (action_rsc == NULL) { + return; + } + + rsc_id = ID(action_rsc); + CRM_CHECK(rsc_id != NULL, + crm_log_xml_err(action->xml, "Bad:action"); return); + +/* + update the CIB + + + + + +*/ + + state = create_xml_node(NULL, XML_CIB_TAG_STATE); + + crm_xml_add(state, XML_ATTR_ID, target_uuid); + crm_xml_add(state, XML_ATTR_UNAME, target); + + rsc = create_xml_node(state, XML_CIB_TAG_LRM); + crm_xml_add(rsc, XML_ATTR_ID, target_uuid); + + rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES); + rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE); + crm_xml_add(rsc, XML_ATTR_ID, rsc_id); + + + crm_copy_xml_element(action_rsc, rsc, XML_ATTR_TYPE); + crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS); + crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER); + + pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target, + __func__); + + rc = cib_conn->cmds->modify(cib_conn, XML_CIB_TAG_STATUS, state, + cib_scope_local); + fsa_register_cib_callback(rc, NULL, cib_action_updated); + free_xml(state); + + crm_trace("Sent CIB update (call ID %d) for synthesized event of action %d (%s on %s)", + rc, action->id, task_uuid, target); + pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update); +} + +void +controld_record_action_timeout(pcmk__graph_action_t *action) +{ + lrmd_event_data_t *op = NULL; + + const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); + + int target_rc = get_target_rc(action); + + crm_warn("%s %d: %s on %s timed out", + crm_element_name(action->xml), action->id, task_uuid, target); + + op = synthesize_timeout_event(action, target_rc); + controld_record_action_event(action, op); + lrmd_free_event(op); +} + +/*! + * \internal + * \brief Execute a resource action from a transition graph + * + * \param[in,out] graph Transition graph being executed + * \param[in,out] action Resource action to execute + * + * \return Standard Pacemaker return code + */ +static int +execute_rsc_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) +{ + /* never overwrite stop actions in the CIB with + * anything other than completed results + * + * Writing pending stops makes it look like the + * resource is running again + */ + xmlNode *cmd = NULL; + xmlNode *rsc_op = NULL; + + gboolean rc = TRUE; + gboolean no_wait = FALSE; + gboolean is_local = FALSE; + + char *counter = NULL; + const char *task = NULL; + const char *value = NULL; + const char *on_node = NULL; + const char *router_node = NULL; + const char *task_uuid = NULL; + + CRM_ASSERT(action != NULL); + CRM_ASSERT(action->xml != NULL); + + pcmk__clear_graph_action_flags(action, pcmk__graph_action_executed); + on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + + CRM_CHECK(!pcmk__str_empty(on_node), + crm_err("Corrupted command(id=%s) %s: no node", + ID(action->xml), pcmk__s(task, "without task")); + return pcmk_rc_node_unknown); + + rsc_op = action->xml; + task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); + task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); + router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE); + + if (!router_node) { + router_node = on_node; + } + + counter = pcmk__transition_key(controld_globals.transition_graph->id, + action->id, get_target_rc(action), + controld_globals.te_uuid); + crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter); + + if (pcmk__str_eq(router_node, controld_globals.our_nodename, + pcmk__str_casei)) { + is_local = TRUE; + } + + value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); + if (crm_is_true(value)) { + no_wait = TRUE; + } + + crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d", + task, task_uuid, (is_local? " locally" : ""), on_node, + (no_wait? " without waiting" : ""), action->id); + + cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node, + CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL); + + if (is_local) { + /* shortcut local resource commands */ + ha_msg_input_t data = { + .msg = cmd, + .xml = rsc_op, + }; + + fsa_data_t msg = { + .id = 0, + .data = &data, + .data_type = fsa_dt_ha_msg, + .fsa_input = I_NULL, + .fsa_cause = C_FSA_INTERNAL, + .actions = A_LRM_INVOKE, + .origin = __func__, + }; + + do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, controld_globals.fsa_state, + I_NULL, &msg); + + } else { + rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE); + } + + free(counter); + free_xml(cmd); + + pcmk__set_graph_action_flags(action, pcmk__graph_action_executed); + + if (rc == FALSE) { + crm_err("Action %d failed: send", action->id); + return ECOMM; + + } else if (no_wait) { + /* Just mark confirmed. Don't bump the job count only to immediately + * decrement it. + */ + crm_info("Action %d confirmed - no wait", action->id); + pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); + pcmk__update_graph(controld_globals.transition_graph, action); + trigger_graph(); + + } else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { + crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.", + action->id, task, task_uuid, on_node, action->timeout); + } else { + if (action->timeout <= 0) { + crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead", + action->id, task, task_uuid, on_node, action->timeout, graph->network_delay); + action->timeout = (int) graph->network_delay; + } + te_update_job_count(action, 1); + te_start_action_timer(graph, action); + } + + return pcmk_rc_ok; +} + +struct te_peer_s +{ + char *name; + int jobs; + int migrate_jobs; +}; + +static void te_peer_free(gpointer p) +{ + struct te_peer_s *peer = p; + + free(peer->name); + free(peer); +} + +void te_reset_job_counts(void) +{ + GHashTableIter iter; + struct te_peer_s *peer = NULL; + + if(te_targets == NULL) { + te_targets = pcmk__strkey_table(NULL, te_peer_free); + } + + g_hash_table_iter_init(&iter, te_targets); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) { + peer->jobs = 0; + peer->migrate_jobs = 0; + } +} + +static void +te_update_job_count_on(const char *target, int offset, bool migrate) +{ + struct te_peer_s *r = NULL; + + if(target == NULL || te_targets == NULL) { + return; + } + + r = g_hash_table_lookup(te_targets, target); + if(r == NULL) { + r = calloc(1, sizeof(struct te_peer_s)); + r->name = strdup(target); + g_hash_table_insert(te_targets, r->name, r); + } + + r->jobs += offset; + if(migrate) { + r->migrate_jobs += offset; + } + crm_trace("jobs[%s] = %d", target, r->jobs); +} + +static void +te_update_job_count(pcmk__graph_action_t *action, int offset) +{ + const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); + const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + + if ((action->type != pcmk__rsc_graph_action) || (target == NULL)) { + /* No limit on these */ + return; + } + + /* if we have a router node, this means the action is performing + * on a remote node. For now, we count all actions occurring on a + * remote node against the job list on the cluster node hosting + * the connection resources */ + target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); + + if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, + CRMD_ACTION_MIGRATED, NULL)) { + + const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE); + const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET); + + te_update_job_count_on(t1, offset, TRUE); + te_update_job_count_on(t2, offset, TRUE); + return; + } else if (target == NULL) { + target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + } + + te_update_job_count_on(target, offset, FALSE); +} + +/*! + * \internal + * \brief Check whether a graph action is allowed to be executed on a node + * + * \param[in] graph Transition graph being executed + * \param[in] action Graph action being executed + * \param[in] target Name of node where action should be executed + * + * \return true if action is allowed, otherwise false + */ +static bool +allowed_on_node(const pcmk__graph_t *graph, const pcmk__graph_action_t *action, + const char *target) +{ + int limit = 0; + struct te_peer_s *r = NULL; + const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); + const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); + + if(target == NULL) { + /* No limit on these */ + return true; + + } else if(te_targets == NULL) { + return false; + } + + r = g_hash_table_lookup(te_targets, target); + limit = throttle_get_job_limit(target); + + if(r == NULL) { + r = calloc(1, sizeof(struct te_peer_s)); + r->name = strdup(target); + g_hash_table_insert(te_targets, r->name, r); + } + + if(limit <= r->jobs) { + crm_trace("Peer %s is over their job limit of %d (%d): deferring %s", + target, limit, r->jobs, id); + return false; + + } else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) { + if (pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) { + crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s", + target, graph->migration_limit, r->migrate_jobs, id); + return false; + } + } + + crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit); + + return true; +} + +/*! + * \internal + * \brief Check whether a graph action is allowed to be executed + * + * \param[in] graph Transition graph being executed + * \param[in] action Graph action being executed + * + * \return true if action is allowed, otherwise false + */ +static bool +graph_action_allowed(pcmk__graph_t *graph, pcmk__graph_action_t *action) +{ + const char *target = NULL; + const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); + + if (action->type != pcmk__rsc_graph_action) { + /* No limit on these */ + return true; + } + + /* if we have a router node, this means the action is performing + * on a remote node. For now, we count all actions occurring on a + * remote node against the job list on the cluster node hosting + * the connection resources */ + target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); + + if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, + CRMD_ACTION_MIGRATED, NULL)) { + target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE); + if (!allowed_on_node(graph, action, target)) { + return false; + } + + target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET); + + } else if (target == NULL) { + target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + } + + return allowed_on_node(graph, action, target); +} + +/*! + * \brief Confirm a graph action (and optionally update graph) + * + * \param[in,out] action Action to confirm + * \param[in,out] graph Update and trigger this graph (if non-NULL) + */ +void +te_action_confirmed(pcmk__graph_action_t *action, pcmk__graph_t *graph) +{ + if (!pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { + if ((action->type == pcmk__rsc_graph_action) + && (crm_element_value(action->xml, XML_LRM_ATTR_TARGET) != NULL)) { + te_update_job_count(action, -1); + } + pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); + } + if (graph) { + pcmk__update_graph(graph, action); + trigger_graph(); + } +} + +static pcmk__graph_functions_t te_graph_fns = { + execute_pseudo_action, + execute_rsc_action, + execute_cluster_action, + controld_execute_fence_action, + graph_action_allowed, +}; + +/* + * \internal + * \brief Register the transitioner's graph functions with \p libpacemaker + */ +void +controld_register_graph_functions(void) +{ + pcmk__set_graph_functions(&te_graph_fns); +} + +void +notify_crmd(pcmk__graph_t *graph) +{ + const char *type = "unknown"; + enum crmd_fsa_input event = I_NULL; + + crm_debug("Processing transition completion in state %s", + fsa_state2string(controld_globals.fsa_state)); + + CRM_CHECK(graph->complete, graph->complete = true); + + switch (graph->completion_action) { + case pcmk__graph_wait: + type = "stop"; + if (controld_globals.fsa_state == S_TRANSITION_ENGINE) { + event = I_TE_SUCCESS; + } + break; + case pcmk__graph_done: + type = "done"; + if (controld_globals.fsa_state == S_TRANSITION_ENGINE) { + event = I_TE_SUCCESS; + } + break; + + case pcmk__graph_restart: + type = "restart"; + if (controld_globals.fsa_state == S_TRANSITION_ENGINE) { + if (controld_get_period_transition_timer() > 0) { + controld_stop_transition_timer(); + controld_start_transition_timer(); + } else { + event = I_PE_CALC; + } + + } else if (controld_globals.fsa_state == S_POLICY_ENGINE) { + controld_set_fsa_action_flags(A_PE_INVOKE); + controld_trigger_fsa(); + } + break; + + case pcmk__graph_shutdown: + type = "shutdown"; + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + event = I_STOP; + + } else { + crm_err("We didn't ask to be shut down, yet the scheduler is telling us to"); + event = I_TERMINATE; + } + } + + crm_debug("Transition %d status: %s - %s", graph->id, type, + pcmk__s(graph->abort_reason, "unspecified reason")); + + graph->abort_reason = NULL; + graph->completion_action = pcmk__graph_done; + + if (event != I_NULL) { + register_fsa_input(C_FSA_INTERNAL, event, NULL); + } else { + controld_trigger_fsa(); + } +} diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c new file mode 100644 index 0000000..cf9de83 --- /dev/null +++ b/daemons/controld/controld_te_callbacks.c @@ -0,0 +1,689 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include + +#include +#include +#include +#include +#include /* For ONLINESTATUS etc */ + +#include + +void te_update_confirm(const char *event, xmlNode * msg); + +#define RSC_OP_PREFIX "//" XML_TAG_DIFF_ADDED "//" XML_TAG_CIB \ + "//" XML_LRM_TAG_RSC_OP "[@" XML_ATTR_ID "='" + +// An explicit shutdown-lock of 0 means the lock has been cleared +static bool +shutdown_lock_cleared(xmlNode *lrm_resource) +{ + time_t shutdown_lock = 0; + + return (crm_element_value_epoch(lrm_resource, XML_CONFIG_ATTR_SHUTDOWN_LOCK, + &shutdown_lock) == pcmk_ok) + && (shutdown_lock == 0); +} + +static void +te_update_diff_v1(const char *event, xmlNode *diff) +{ + int lpc, max; + xmlXPathObject *xpathObj = NULL; + GString *rsc_op_xpath = NULL; + + CRM_CHECK(diff != NULL, return); + + pcmk__output_set_log_level(controld_globals.logger_out, LOG_TRACE); + controld_globals.logger_out->message(controld_globals.logger_out, + "xml-patchset", diff); + + if (cib__config_changed_v1(NULL, NULL, &diff)) { + abort_transition(INFINITY, pcmk__graph_restart, "Non-status change", + diff); + goto bail; /* configuration changed */ + } + + /* Tickets Attributes - Added/Updated */ + xpathObj = + xpath_search(diff, + "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_TICKETS); + if (numXpathResults(xpathObj) > 0) { + xmlNode *aborted = getXpathResult(xpathObj, 0); + + abort_transition(INFINITY, pcmk__graph_restart, + "Ticket attribute: update", aborted); + goto bail; + + } + freeXpathObject(xpathObj); + + /* Tickets Attributes - Removed */ + xpathObj = + xpath_search(diff, + "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS); + if (numXpathResults(xpathObj) > 0) { + xmlNode *aborted = getXpathResult(xpathObj, 0); + + abort_transition(INFINITY, pcmk__graph_restart, + "Ticket attribute: removal", aborted); + goto bail; + } + freeXpathObject(xpathObj); + + /* Transient Attributes - Removed */ + xpathObj = + xpath_search(diff, + "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" + XML_TAG_TRANSIENT_NODEATTRS); + if (numXpathResults(xpathObj) > 0) { + xmlNode *aborted = getXpathResult(xpathObj, 0); + + abort_transition(INFINITY, pcmk__graph_restart, + "Transient attribute: removal", aborted); + goto bail; + + } + freeXpathObject(xpathObj); + + // Check for lrm_resource entries + xpathObj = xpath_search(diff, + "//" F_CIB_UPDATE_RESULT + "//" XML_TAG_DIFF_ADDED + "//" XML_LRM_TAG_RESOURCE); + max = numXpathResults(xpathObj); + + /* + * Updates by, or in response to, graph actions will never affect more than + * one resource at a time, so such updates indicate an LRM refresh. In that + * case, start a new transition rather than check each result individually, + * which can result in _huge_ speedups in large clusters. + * + * Unfortunately, we can only do so when there are no pending actions. + * Otherwise, we could mistakenly throw away those results here, and + * the cluster will stall waiting for them and time out the operation. + */ + if ((controld_globals.transition_graph->pending == 0) && (max > 1)) { + crm_debug("Ignoring resource operation updates due to history refresh of %d resources", + max); + crm_log_xml_trace(diff, "lrm-refresh"); + abort_transition(INFINITY, pcmk__graph_restart, "History refresh", + NULL); + goto bail; + } + + if (max == 1) { + xmlNode *lrm_resource = getXpathResult(xpathObj, 0); + + if (shutdown_lock_cleared(lrm_resource)) { + // @TODO would be more efficient to abort once after transition done + abort_transition(INFINITY, pcmk__graph_restart, + "Shutdown lock cleared", lrm_resource); + // Still process results, so we stop timers and update failcounts + } + } + freeXpathObject(xpathObj); + + /* Process operation updates */ + xpathObj = + xpath_search(diff, + "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); + max = numXpathResults(xpathObj); + if (max > 0) { + int lpc = 0; + + for (lpc = 0; lpc < max; lpc++) { + xmlNode *rsc_op = getXpathResult(xpathObj, lpc); + const char *node = get_node_id(rsc_op); + + process_graph_event(rsc_op, node); + } + } + freeXpathObject(xpathObj); + + /* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */ + xpathObj = xpath_search(diff, "//" XML_TAG_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP); + max = numXpathResults(xpathObj); + for (lpc = 0; lpc < max; lpc++) { + const char *op_id = NULL; + xmlXPathObject *op_match = NULL; + xmlNode *match = getXpathResult(xpathObj, lpc); + + CRM_LOG_ASSERT(match != NULL); + if(match == NULL) { continue; }; + + op_id = ID(match); + + if (rsc_op_xpath == NULL) { + rsc_op_xpath = g_string_new(RSC_OP_PREFIX); + } else { + g_string_truncate(rsc_op_xpath, sizeof(RSC_OP_PREFIX) - 1); + } + pcmk__g_strcat(rsc_op_xpath, op_id, "']", NULL); + + op_match = xpath_search(diff, (const char *) rsc_op_xpath->str); + if (numXpathResults(op_match) == 0) { + /* Prevent false positives by matching cancelations too */ + const char *node = get_node_id(match); + pcmk__graph_action_t *cancelled = get_cancel_action(op_id, node); + + if (cancelled == NULL) { + crm_debug("No match for deleted action %s (%s on %s)", + (const char *) rsc_op_xpath->str, op_id, node); + abort_transition(INFINITY, pcmk__graph_restart, + "Resource op removal", match); + freeXpathObject(op_match); + goto bail; + + } else { + crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d", + op_id, node, cancelled->id); + } + } + + freeXpathObject(op_match); + } + + bail: + freeXpathObject(xpathObj); + if (rsc_op_xpath != NULL) { + g_string_free(rsc_op_xpath, TRUE); + } +} + +static void +process_lrm_resource_diff(xmlNode *lrm_resource, const char *node) +{ + for (xmlNode *rsc_op = pcmk__xml_first_child(lrm_resource); rsc_op != NULL; + rsc_op = pcmk__xml_next(rsc_op)) { + process_graph_event(rsc_op, node); + } + if (shutdown_lock_cleared(lrm_resource)) { + // @TODO would be more efficient to abort once after transition done + abort_transition(INFINITY, pcmk__graph_restart, "Shutdown lock cleared", + lrm_resource); + } +} + +static void +process_resource_updates(const char *node, xmlNode *xml, xmlNode *change, + const char *op, const char *xpath) +{ + xmlNode *rsc = NULL; + + if (xml == NULL) { + return; + } + + if (strcmp(TYPE(xml), XML_CIB_TAG_LRM) == 0) { + xml = first_named_child(xml, XML_LRM_TAG_RESOURCES); + CRM_CHECK(xml != NULL, return); + } + + CRM_CHECK(strcmp(TYPE(xml), XML_LRM_TAG_RESOURCES) == 0, return); + + /* + * Updates by, or in response to, TE actions will never contain updates + * for more than one resource at a time, so such updates indicate an + * LRM refresh. + * + * In that case, start a new transition rather than check each result + * individually, which can result in _huge_ speedups in large clusters. + * + * Unfortunately, we can only do so when there are no pending actions. + * Otherwise, we could mistakenly throw away those results here, and + * the cluster will stall waiting for them and time out the operation. + */ + if ((controld_globals.transition_graph->pending == 0) + && (xml->children != NULL) && (xml->children->next != NULL)) { + + crm_log_xml_trace(change, "lrm-refresh"); + abort_transition(INFINITY, pcmk__graph_restart, "History refresh", + NULL); + return; + } + + for (rsc = pcmk__xml_first_child(xml); rsc != NULL; + rsc = pcmk__xml_next(rsc)) { + crm_trace("Processing %s", ID(rsc)); + process_lrm_resource_diff(rsc, node); + } +} + +static char *extract_node_uuid(const char *xpath) +{ + char *mutable_path = strdup(xpath); + char *node_uuid = NULL; + char *search = NULL; + char *match = NULL; + + match = strstr(mutable_path, "node_state[@" XML_ATTR_ID "=\'"); + if (match == NULL) { + free(mutable_path); + return NULL; + } + match += strlen("node_state[@" XML_ATTR_ID "=\'"); + + search = strchr(match, '\''); + if (search == NULL) { + free(mutable_path); + return NULL; + } + search[0] = 0; + + node_uuid = strdup(match); + free(mutable_path); + return node_uuid; +} + +static void +abort_unless_down(const char *xpath, const char *op, xmlNode *change, + const char *reason) +{ + char *node_uuid = NULL; + pcmk__graph_action_t *down = NULL; + + if(!pcmk__str_eq(op, "delete", pcmk__str_casei)) { + abort_transition(INFINITY, pcmk__graph_restart, reason, change); + return; + } + + node_uuid = extract_node_uuid(xpath); + if(node_uuid == NULL) { + crm_err("Could not extract node ID from %s", xpath); + abort_transition(INFINITY, pcmk__graph_restart, reason, change); + return; + } + + down = match_down_event(node_uuid); + if (down == NULL) { + crm_trace("Not expecting %s to be down (%s)", node_uuid, xpath); + abort_transition(INFINITY, pcmk__graph_restart, reason, change); + } else { + crm_trace("Expecting changes to %s (%s)", node_uuid, xpath); + } + free(node_uuid); +} + +static void +process_op_deletion(const char *xpath, xmlNode *change) +{ + char *mutable_key = strdup(xpath); + char *key; + char *node_uuid; + + // Extract the part of xpath between last pair of single quotes + key = strrchr(mutable_key, '\''); + if (key != NULL) { + *key = '\0'; + key = strrchr(mutable_key, '\''); + } + if (key == NULL) { + crm_warn("Ignoring malformed CIB update (resource deletion of %s)", + xpath); + free(mutable_key); + return; + } + ++key; + + node_uuid = extract_node_uuid(xpath); + if (confirm_cancel_action(key, node_uuid) == FALSE) { + abort_transition(INFINITY, pcmk__graph_restart, + "Resource operation removal", change); + } + free(mutable_key); + free(node_uuid); +} + +static void +process_delete_diff(const char *xpath, const char *op, xmlNode *change) +{ + if (strstr(xpath, "/" XML_LRM_TAG_RSC_OP "[")) { + process_op_deletion(xpath, change); + + } else if (strstr(xpath, "/" XML_CIB_TAG_LRM "[")) { + abort_unless_down(xpath, op, change, "Resource state removal"); + + } else if (strstr(xpath, "/" XML_CIB_TAG_STATE "[")) { + abort_unless_down(xpath, op, change, "Node state removal"); + + } else { + crm_trace("Ignoring delete of %s", xpath); + } +} + +static void +process_node_state_diff(xmlNode *state, xmlNode *change, const char *op, + const char *xpath) +{ + xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM); + + process_resource_updates(ID(state), lrm, change, op, xpath); +} + +static void +process_status_diff(xmlNode *status, xmlNode *change, const char *op, + const char *xpath) +{ + for (xmlNode *state = pcmk__xml_first_child(status); state != NULL; + state = pcmk__xml_next(state)) { + process_node_state_diff(state, change, op, xpath); + } +} + +static void +process_cib_diff(xmlNode *cib, xmlNode *change, const char *op, + const char *xpath) +{ + xmlNode *status = first_named_child(cib, XML_CIB_TAG_STATUS); + xmlNode *config = first_named_child(cib, XML_CIB_TAG_CONFIGURATION); + + if (status) { + process_status_diff(status, change, op, xpath); + } + if (config) { + abort_transition(INFINITY, pcmk__graph_restart, + "Non-status-only change", change); + } +} + +static void +te_update_diff_v2(xmlNode *diff) +{ + crm_log_xml_trace(diff, "Patch:Raw"); + + for (xmlNode *change = pcmk__xml_first_child(diff); change != NULL; + change = pcmk__xml_next(change)) { + + xmlNode *match = NULL; + const char *name = NULL; + const char *xpath = crm_element_value(change, XML_DIFF_PATH); + + // Possible ops: create, modify, delete, move + const char *op = crm_element_value(change, XML_DIFF_OP); + + // Ignore uninteresting updates + if (op == NULL) { + continue; + + } else if (xpath == NULL) { + crm_trace("Ignoring %s change for version field", op); + continue; + + } else if ((strcmp(op, "move") == 0) + && (strstr(xpath, + "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION + "/" XML_CIB_TAG_RESOURCES) == NULL)) { + /* We still need to consider moves within the resources section, + * since they affect placement order. + */ + crm_trace("Ignoring move change at %s", xpath); + continue; + } + + // Find the result of create/modify ops + if (strcmp(op, "create") == 0) { + match = change->children; + + } else if (strcmp(op, "modify") == 0) { + match = first_named_child(change, XML_DIFF_RESULT); + if(match) { + match = match->children; + } + + } else if (!pcmk__str_any_of(op, "delete", "move", NULL)) { + crm_warn("Ignoring malformed CIB update (%s operation on %s is unrecognized)", + op, xpath); + continue; + } + + if (match) { + if (match->type == XML_COMMENT_NODE) { + crm_trace("Ignoring %s operation for comment at %s", op, xpath); + continue; + } + name = (const char *)match->name; + } + + crm_trace("Handling %s operation for %s%s%s", + op, (xpath? xpath : "CIB"), + (name? " matched by " : ""), (name? name : "")); + + if (strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION)) { + abort_transition(INFINITY, pcmk__graph_restart, + "Configuration change", change); + break; // Won't be packaged with operation results we may be waiting for + + } else if (strstr(xpath, "/" XML_CIB_TAG_TICKETS) + || pcmk__str_eq(name, XML_CIB_TAG_TICKETS, pcmk__str_none)) { + abort_transition(INFINITY, pcmk__graph_restart, + "Ticket attribute change", change); + break; // Won't be packaged with operation results we may be waiting for + + } else if (strstr(xpath, "/" XML_TAG_TRANSIENT_NODEATTRS "[") + || pcmk__str_eq(name, XML_TAG_TRANSIENT_NODEATTRS, + pcmk__str_none)) { + abort_unless_down(xpath, op, change, "Transient attribute change"); + break; // Won't be packaged with operation results we may be waiting for + + } else if (strcmp(op, "delete") == 0) { + process_delete_diff(xpath, op, change); + + } else if (name == NULL) { + crm_warn("Ignoring malformed CIB update (%s at %s has no result)", + op, xpath); + + } else if (strcmp(name, XML_TAG_CIB) == 0) { + process_cib_diff(match, change, op, xpath); + + } else if (strcmp(name, XML_CIB_TAG_STATUS) == 0) { + process_status_diff(match, change, op, xpath); + + } else if (strcmp(name, XML_CIB_TAG_STATE) == 0) { + process_node_state_diff(match, change, op, xpath); + + } else if (strcmp(name, XML_CIB_TAG_LRM) == 0) { + process_resource_updates(ID(match), match, change, op, xpath); + + } else if (strcmp(name, XML_LRM_TAG_RESOURCES) == 0) { + char *local_node = pcmk__xpath_node_id(xpath, "lrm"); + + process_resource_updates(local_node, match, change, op, xpath); + free(local_node); + + } else if (strcmp(name, XML_LRM_TAG_RESOURCE) == 0) { + char *local_node = pcmk__xpath_node_id(xpath, "lrm"); + + process_lrm_resource_diff(match, local_node); + free(local_node); + + } else if (strcmp(name, XML_LRM_TAG_RSC_OP) == 0) { + char *local_node = pcmk__xpath_node_id(xpath, "lrm"); + + process_graph_event(match, local_node); + free(local_node); + + } else { + crm_warn("Ignoring malformed CIB update (%s at %s has unrecognized result %s)", + op, xpath, name); + } + } +} + +void +te_update_diff(const char *event, xmlNode * msg) +{ + xmlNode *diff = NULL; + const char *op = NULL; + int rc = -EINVAL; + int format = 1; + int p_add[] = { 0, 0, 0 }; + int p_del[] = { 0, 0, 0 }; + + CRM_CHECK(msg != NULL, return); + crm_element_value_int(msg, F_CIB_RC, &rc); + + if (controld_globals.transition_graph == NULL) { + crm_trace("No graph"); + return; + + } else if (rc < pcmk_ok) { + crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc)); + return; + + } else if (controld_globals.transition_graph->complete + && (controld_globals.fsa_state != S_IDLE) + && (controld_globals.fsa_state != S_TRANSITION_ENGINE) + && (controld_globals.fsa_state != S_POLICY_ENGINE)) { + crm_trace("Filter state=%s (complete)", + fsa_state2string(controld_globals.fsa_state)); + return; + } + + op = crm_element_value(msg, F_CIB_OPERATION); + diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); + + xml_patch_versions(diff, p_add, p_del); + crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op, + p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2], + fsa_state2string(controld_globals.fsa_state)); + + crm_element_value_int(diff, "format", &format); + switch (format) { + case 1: + te_update_diff_v1(event, diff); + break; + case 2: + te_update_diff_v2(diff); + break; + default: + crm_warn("Ignoring malformed CIB update (unknown patch format %d)", + format); + } + controld_remove_all_outside_events(); +} + +void +process_te_message(xmlNode * msg, xmlNode * xml_data) +{ + const char *value = NULL; + xmlXPathObject *xpathObj = NULL; + int nmatches = 0; + + CRM_CHECK(msg != NULL, return); + + // Transition requests must specify transition engine as subsystem + value = crm_element_value(msg, F_CRM_SYS_TO); + if (pcmk__str_empty(value) + || !pcmk__str_eq(value, CRM_SYSTEM_TENGINE, pcmk__str_none)) { + crm_info("Received invalid transition request: subsystem '%s' not '" + CRM_SYSTEM_TENGINE "'", pcmk__s(value, "")); + return; + } + + // Only the lrm_invoke command is supported as a transition request + value = crm_element_value(msg, F_CRM_TASK); + if (!pcmk__str_eq(value, CRM_OP_INVOKE_LRM, pcmk__str_none)) { + crm_info("Received invalid transition request: command '%s' not '" + CRM_OP_INVOKE_LRM "'", pcmk__s(value, "")); + return; + } + + // Transition requests must be marked as coming from the executor + value = crm_element_value(msg, F_CRM_SYS_FROM); + if (!pcmk__str_eq(value, CRM_SYSTEM_LRMD, pcmk__str_none)) { + crm_info("Received invalid transition request: from '%s' not '" + CRM_SYSTEM_LRMD "'", pcmk__s(value, "")); + return; + } + + crm_debug("Processing transition request with ref='%s' origin='%s'", + pcmk__s(crm_element_value(msg, F_CRM_REFERENCE), ""), + pcmk__s(crm_element_value(msg, F_ORIG), "")); + + xpathObj = xpath_search(xml_data, "//" XML_LRM_TAG_RSC_OP); + nmatches = numXpathResults(xpathObj); + if (nmatches == 0) { + crm_err("Received transition request with no results (bug?)"); + } else { + for (int lpc = 0; lpc < nmatches; lpc++) { + xmlNode *rsc_op = getXpathResult(xpathObj, lpc); + const char *node = get_node_id(rsc_op); + + process_graph_event(rsc_op, node); + } + } + freeXpathObject(xpathObj); +} + +void +cib_action_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + if (rc < pcmk_ok) { + crm_err("Update %d FAILED: %s", call_id, pcmk_strerror(rc)); + } +} + +/*! + * \brief Handle a timeout in node-to-node communication + * + * \param[in,out] data Pointer to graph action + * + * \return FALSE (indicating that source should be not be re-added) + */ +gboolean +action_timer_callback(gpointer data) +{ + pcmk__graph_action_t *action = (pcmk__graph_action_t *) data; + const char *task = NULL; + const char *on_node = NULL; + const char *via_node = NULL; + + CRM_CHECK(data != NULL, return FALSE); + + stop_te_timer(action); + + task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); + on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + via_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); + + if (controld_globals.transition_graph->complete) { + crm_notice("Node %s did not send %s result (via %s) within %dms " + "(ignoring because transition not in progress)", + (on_node? on_node : ""), (task? task : "unknown action"), + (via_node? via_node : "controller"), action->timeout); + } else { + /* fail the action */ + + crm_err("Node %s did not send %s result (via %s) within %dms " + "(action timeout plus cluster-delay)", + (on_node? on_node : ""), (task? task : "unknown action"), + (via_node? via_node : "controller"), + (action->timeout + + controld_globals.transition_graph->network_delay)); + pcmk__log_graph_action(LOG_ERR, action); + + pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); + + te_action_confirmed(action, controld_globals.transition_graph); + abort_transition(INFINITY, pcmk__graph_restart, "Action lost", NULL); + + // Record timeout in the CIB if appropriate + if ((action->type == pcmk__rsc_graph_action) + && controld_action_is_recordable(task)) { + controld_record_action_timeout(action); + } + } + + return FALSE; +} diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c new file mode 100644 index 0000000..d4e2b0f --- /dev/null +++ b/daemons/controld/controld_te_events.c @@ -0,0 +1,601 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include + +/*! + * \internal + * \brief Action numbers of outside events processed in current update diff + * + * This table is to be used as a set. It should be empty when the transitioner + * begins processing a CIB update diff. It ensures that if there are multiple + * events (for example, "_last_0" and "_last_failure_0") for the same action, + * only one of them updates the failcount. Events that originate outside the + * cluster can't be confirmed, since they're not in the transition graph. + */ +static GHashTable *outside_events = NULL; + +/*! + * \internal + * \brief Empty the hash table containing action numbers of outside events + */ +void +controld_remove_all_outside_events(void) +{ + if (outside_events != NULL) { + g_hash_table_remove_all(outside_events); + } +} + +/*! + * \internal + * \brief Destroy the hash table containing action numbers of outside events + */ +void +controld_destroy_outside_events_table(void) +{ + if (outside_events != NULL) { + g_hash_table_destroy(outside_events); + outside_events = NULL; + } +} + +/*! + * \internal + * \brief Add an outside event's action number to a set + * + * \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the + * event was not already in the set, or \p pcmk_rc_already otherwise. + */ +static int +record_outside_event(gint action_num) +{ + if (outside_events == NULL) { + outside_events = g_hash_table_new(NULL, NULL); + } + + if (g_hash_table_add(outside_events, GINT_TO_POINTER(action_num))) { + return pcmk_rc_ok; + } + return pcmk_rc_already; +} + +gboolean +fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node) +{ + const char *target_uuid = NULL; + const char *router = NULL; + const char *router_uuid = NULL; + xmlNode *last_action = NULL; + + GList *gIter = NULL; + GList *gIter2 = NULL; + + if (graph == NULL || graph->complete) { + return FALSE; + } + + gIter = graph->synapses; + for (; gIter != NULL; gIter = gIter->next) { + pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data; + + if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) { + /* We've already been here */ + continue; + } + + gIter2 = synapse->actions; + for (; gIter2 != NULL; gIter2 = gIter2->next) { + pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data; + + if ((action->type == pcmk__pseudo_graph_action) + || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { + continue; + } else if (action->type == pcmk__cluster_graph_action) { + const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); + + if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) { + continue; + } + } + + target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); + router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); + if (router) { + crm_node_t *node = crm_get_peer(0, router); + if (node) { + router_uuid = node->uuid; + } + } + + if (pcmk__str_eq(target_uuid, down_node, pcmk__str_casei) || pcmk__str_eq(router_uuid, down_node, pcmk__str_casei)) { + pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); + pcmk__set_synapse_flags(synapse, pcmk__synapse_failed); + last_action = action->xml; + stop_te_timer(action); + pcmk__update_graph(graph, action); + + if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) { + crm_notice("Action %d (%s) was pending on %s (offline)", + action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node); + } else { + crm_info("Action %d (%s) is scheduled for %s (offline)", + action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node); + } + } + } + } + + if (last_action != NULL) { + crm_info("Node %s shutdown resulted in un-runnable actions", down_node); + abort_transition(INFINITY, pcmk__graph_restart, "Node failure", + last_action); + return TRUE; + } + + return FALSE; +} + +/*! + * \internal + * \brief Update failure-related node attributes if warranted + * + * \param[in] event XML describing operation that (maybe) failed + * \param[in] event_node_uuid Node that event occurred on + * \param[in] rc Actual operation return code + * \param[in] target_rc Expected operation return code + * \param[in] do_update If TRUE, do update regardless of operation type + * \param[in] ignore_failures If TRUE, update last failure but not fail count + * + * \return TRUE if this was not a direct nack, success or lrm status refresh + */ +static gboolean +update_failcount(const xmlNode *event, const char *event_node_uuid, int rc, + int target_rc, gboolean do_update, gboolean ignore_failures) +{ + guint interval_ms = 0; + + char *task = NULL; + char *rsc_id = NULL; + + const char *value = NULL; + const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY); + const char *on_uname = crm_peer_uname(event_node_uuid); + const char *origin = crm_element_value(event, XML_ATTR_ORIGIN); + + // Nothing needs to be done for success or status refresh + if (rc == target_rc) { + return FALSE; + } else if (pcmk__str_eq(origin, "build_active_RAs", pcmk__str_casei)) { + crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh", + id, rc, on_uname); + return FALSE; + } + + /* Sanity check */ + CRM_CHECK(on_uname != NULL, return TRUE); + CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval_ms), + crm_err("Couldn't parse: %s", ID(event)); goto bail); + + /* Decide whether update is necessary and what value to use */ + if ((interval_ms > 0) + || pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_none) + || pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_none)) { + do_update = TRUE; + + } else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_none)) { + do_update = TRUE; + value = pcmk__s(controld_globals.transition_graph->failed_start_offset, + CRM_INFINITY_S); + + } else if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_none)) { + do_update = TRUE; + value = pcmk__s(controld_globals.transition_graph->failed_stop_offset, + CRM_INFINITY_S); + } + + if (do_update) { + pcmk__attrd_query_pair_t *fail_pair = NULL; + pcmk__attrd_query_pair_t *last_pair = NULL; + char *fail_name = NULL; + char *last_name = NULL; + GList *attrs = NULL; + + uint32_t opts = pcmk__node_attr_none; + + char *now = pcmk__ttoa(time(NULL)); + + // Fail count will be either incremented or set to infinity + if (!pcmk_str_is_infinity(value)) { + value = XML_NVPAIR_ATTR_VALUE "++"; + } + + if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) { + opts |= pcmk__node_attr_remote; + } + + crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)", + (ignore_failures? "last failure" : "failcount"), + rsc_id, on_uname, task, rc, value, now); + + /* Update the fail count, if we're not ignoring failures */ + if (!ignore_failures) { + fail_pair = calloc(1, sizeof(pcmk__attrd_query_pair_t)); + CRM_ASSERT(fail_pair != NULL); + + fail_name = pcmk__failcount_name(rsc_id, task, interval_ms); + fail_pair->name = fail_name; + fail_pair->value = value; + fail_pair->node = on_uname; + + attrs = g_list_prepend(attrs, fail_pair); + } + + /* Update the last failure time (even if we're ignoring failures, + * so that failure can still be detected and shown, e.g. by crm_mon) + */ + last_pair = calloc(1, sizeof(pcmk__attrd_query_pair_t)); + CRM_ASSERT(last_pair != NULL); + + last_name = pcmk__lastfailure_name(rsc_id, task, interval_ms); + last_pair->name = last_name; + last_pair->value = now; + last_pair->node = on_uname; + + attrs = g_list_prepend(attrs, last_pair); + + update_attrd_list(attrs, opts); + + free(fail_name); + free(fail_pair); + + free(last_name); + free(last_pair); + g_list_free(attrs); + + free(now); + } + + bail: + free(rsc_id); + free(task); + return TRUE; +} + +pcmk__graph_action_t * +controld_get_action(int id) +{ + for (GList *item = controld_globals.transition_graph->synapses; + item != NULL; item = item->next) { + pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) item->data; + + for (GList *item2 = synapse->actions; item2; item2 = item2->next) { + pcmk__graph_action_t *action = (pcmk__graph_action_t *) item2->data; + + if (action->id == id) { + return action; + } + } + } + return NULL; +} + +pcmk__graph_action_t * +get_cancel_action(const char *id, const char *node) +{ + GList *gIter = NULL; + GList *gIter2 = NULL; + + gIter = controld_globals.transition_graph->synapses; + for (; gIter != NULL; gIter = gIter->next) { + pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data; + + gIter2 = synapse->actions; + for (; gIter2 != NULL; gIter2 = gIter2->next) { + const char *task = NULL; + const char *target = NULL; + pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data; + + task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); + if (!pcmk__str_eq(CRMD_ACTION_CANCEL, task, pcmk__str_casei)) { + continue; + } + + task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); + if (!pcmk__str_eq(task, id, pcmk__str_casei)) { + crm_trace("Wrong key %s for %s on %s", task, id, node); + continue; + } + + target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); + if (node && !pcmk__str_eq(target, node, pcmk__str_casei)) { + crm_trace("Wrong node %s for %s on %s", target, id, node); + continue; + } + + crm_trace("Found %s on %s", id, node); + return action; + } + } + + return NULL; +} + +bool +confirm_cancel_action(const char *id, const char *node_id) +{ + const char *op_key = NULL; + const char *node_name = NULL; + pcmk__graph_action_t *cancel = get_cancel_action(id, node_id); + + if (cancel == NULL) { + return FALSE; + } + op_key = crm_element_value(cancel->xml, XML_LRM_ATTR_TASK_KEY); + node_name = crm_element_value(cancel->xml, XML_LRM_ATTR_TARGET); + + stop_te_timer(cancel); + te_action_confirmed(cancel, controld_globals.transition_graph); + + crm_info("Cancellation of %s on %s confirmed (action %d)", + op_key, node_name, cancel->id); + return TRUE; +} + +/* downed nodes are listed like: ... */ +#define XPATH_DOWNED "//" XML_GRAPH_TAG_DOWNED \ + "/" XML_CIB_TAG_NODE "[@" XML_ATTR_ID "='%s']" + +/*! + * \brief Find a transition event that would have made a specified node down + * + * \param[in] target UUID of node to match + * + * \return Matching event if found, NULL otherwise + */ +pcmk__graph_action_t * +match_down_event(const char *target) +{ + pcmk__graph_action_t *match = NULL; + xmlXPathObjectPtr xpath_ret = NULL; + GList *gIter, *gIter2; + + char *xpath = crm_strdup_printf(XPATH_DOWNED, target); + + for (gIter = controld_globals.transition_graph->synapses; + gIter != NULL && match == NULL; + gIter = gIter->next) { + + for (gIter2 = ((pcmk__graph_synapse_t * ) gIter->data)->actions; + gIter2 != NULL && match == NULL; + gIter2 = gIter2->next) { + + match = (pcmk__graph_action_t *) gIter2->data; + if (pcmk_is_set(match->flags, pcmk__graph_action_executed)) { + xpath_ret = xpath_search(match->xml, xpath); + if (numXpathResults(xpath_ret) < 1) { + match = NULL; + } + freeXpathObject(xpath_ret); + } else { + // Only actions that were actually started can match + match = NULL; + } + } + } + + free(xpath); + + if (match != NULL) { + crm_debug("Shutdown action %d (%s) found for node %s", match->id, + crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY), target); + } else { + crm_debug("No reason to expect node %s to be down", target); + } + return match; +} + +void +process_graph_event(xmlNode *event, const char *event_node) +{ + int rc = -1; // Actual result + int target_rc = -1; // Expected result + int status = -1; // Executor status + int callid = -1; // Executor call ID + int transition_num = -1; // Transition number + int action_num = -1; // Action number within transition + char *update_te_uuid = NULL; + bool ignore_failures = FALSE; + const char *id = NULL; + const char *desc = NULL; + const char *magic = NULL; + const char *uname = NULL; + + CRM_ASSERT(event != NULL); + +/* + +*/ + + magic = crm_element_value(event, XML_ATTR_TRANSITION_KEY); + if (magic == NULL) { + /* non-change */ + return; + } + + crm_element_value_int(event, XML_LRM_ATTR_OPSTATUS, &status); + if (status == PCMK_EXEC_PENDING) { + return; + } + + id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY); + crm_element_value_int(event, XML_LRM_ATTR_RC, &rc); + crm_element_value_int(event, XML_LRM_ATTR_CALLID, &callid); + + rc = pcmk__effective_rc(rc); + + if (decode_transition_key(magic, &update_te_uuid, &transition_num, + &action_num, &target_rc) == FALSE) { + // decode_transition_key() already logged the bad key + crm_err("Can't process action %s result: Incompatible versions? " + CRM_XS " call-id=%d", id, callid); + abort_transition(INFINITY, pcmk__graph_restart, "Bad event", event); + return; + } + + if (transition_num == -1) { + // E.g. crm_resource --fail + if (record_outside_event(action_num) != pcmk_rc_ok) { + crm_debug("Outside event with transition key '%s' has already been " + "processed", magic); + goto bail; + } + desc = "initiated outside of the cluster"; + abort_transition(INFINITY, pcmk__graph_restart, "Unexpected event", + event); + + } else if ((action_num < 0) + || !pcmk__str_eq(update_te_uuid, controld_globals.te_uuid, + pcmk__str_none)) { + desc = "initiated by a different DC"; + abort_transition(INFINITY, pcmk__graph_restart, "Foreign event", event); + + } else if ((controld_globals.transition_graph->id != transition_num) + || controld_globals.transition_graph->complete) { + + // Action is not from currently active transition + + guint interval_ms = 0; + + if (parse_op_key(id, NULL, NULL, &interval_ms) + && (interval_ms != 0)) { + /* Recurring actions have the transition number they were first + * scheduled in. + */ + + if (status == PCMK_EXEC_CANCELLED) { + confirm_cancel_action(id, get_node_id(event)); + goto bail; + } + + desc = "arrived after initial scheduling"; + abort_transition(INFINITY, pcmk__graph_restart, + "Change in recurring result", event); + + } else if (controld_globals.transition_graph->id != transition_num) { + desc = "arrived really late"; + abort_transition(INFINITY, pcmk__graph_restart, "Old event", event); + } else { + desc = "arrived late"; + abort_transition(INFINITY, pcmk__graph_restart, "Inactive graph", + event); + } + + } else { + // Event is result of an action from currently active transition + pcmk__graph_action_t *action = controld_get_action(action_num); + + if (action == NULL) { + // Should never happen + desc = "unknown"; + abort_transition(INFINITY, pcmk__graph_restart, "Unknown event", + event); + + } else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { + /* Nothing further needs to be done if the action has already been + * confirmed. This can happen e.g. when processing both an + * "xxx_last_0" or "xxx_last_failure_0" record as well as the main + * history record, which would otherwise result in incorrectly + * bumping the fail count twice. + */ + crm_log_xml_debug(event, "Event already confirmed:"); + goto bail; + + } else { + /* An action result needs to be confirmed. + * (This is the only case where desc == NULL.) + */ + + if (pcmk__str_eq(crm_meta_value(action->params, XML_OP_ATTR_ON_FAIL), "ignore", pcmk__str_casei)) { + ignore_failures = TRUE; + + } else if (rc != target_rc) { + pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); + } + + stop_te_timer(action); + te_action_confirmed(action, controld_globals.transition_graph); + + if (pcmk_is_set(action->flags, pcmk__graph_action_failed)) { + abort_transition(action->synapse->priority + 1, + pcmk__graph_restart, "Event failed", event); + } + } + } + + if (id == NULL) { + id = "unknown action"; + } + uname = crm_element_value(event, XML_LRM_ATTR_TARGET); + if (uname == NULL) { + uname = "unknown node"; + } + + if (status == PCMK_EXEC_INVALID) { + // We couldn't attempt the action + crm_info("Transition %d action %d (%s on %s): %s", + transition_num, action_num, id, uname, + pcmk_exec_status_str(status)); + + } else if (desc && update_failcount(event, event_node, rc, target_rc, + (transition_num == -1), FALSE)) { + crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' " + CRM_XS " target-rc=%d rc=%d call-id=%d event='%s'", + transition_num, action_num, id, uname, + services_ocf_exitcode_str(target_rc), + services_ocf_exitcode_str(rc), + target_rc, rc, callid, desc); + + } else if (desc) { + crm_info("Transition %d action %d (%s on %s): %s " + CRM_XS " rc=%d target-rc=%d call-id=%d", + transition_num, action_num, id, uname, + desc, rc, target_rc, callid); + + } else if (rc == target_rc) { + crm_info("Transition %d action %d (%s on %s) confirmed: %s " + CRM_XS " rc=%d call-id=%d", + transition_num, action_num, id, uname, + services_ocf_exitcode_str(rc), rc, callid); + + } else { + update_failcount(event, event_node, rc, target_rc, + (transition_num == -1), ignore_failures); + crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' " + CRM_XS " target-rc=%d rc=%d call-id=%d", + transition_num, action_num, id, uname, + services_ocf_exitcode_str(target_rc), + services_ocf_exitcode_str(rc), + target_rc, rc, callid); + } + + bail: + free(update_te_uuid); +} diff --git a/daemons/controld/controld_te_utils.c b/daemons/controld/controld_te_utils.c new file mode 100644 index 0000000..ecbc0b2 --- /dev/null +++ b/daemons/controld/controld_te_utils.c @@ -0,0 +1,367 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include +#include +#include +#include + +#include + +//! Triggers transition graph processing +static crm_trigger_t *transition_trigger = NULL; + +gboolean +stop_te_timer(pcmk__graph_action_t *action) +{ + if (action == NULL) { + return FALSE; + } + if (action->timer != 0) { + crm_trace("Stopping action timer"); + g_source_remove(action->timer); + action->timer = 0; + } else { + crm_trace("Action timer was already stopped"); + return FALSE; + } + return TRUE; +} + +static gboolean +te_graph_trigger(gpointer user_data) +{ + if (controld_globals.transition_graph == NULL) { + crm_debug("Nothing to do"); + return TRUE; + } + + crm_trace("Invoking graph %d in state %s", + controld_globals.transition_graph->id, + fsa_state2string(controld_globals.fsa_state)); + + switch (controld_globals.fsa_state) { + case S_STARTING: + case S_PENDING: + case S_NOT_DC: + case S_HALT: + case S_ILLEGAL: + case S_STOPPING: + case S_TERMINATE: + return TRUE; + default: + break; + } + + if (!controld_globals.transition_graph->complete) { + enum pcmk__graph_status graph_rc; + int orig_limit = controld_globals.transition_graph->batch_limit; + int throttled_limit = throttle_get_total_job_limit(orig_limit); + + controld_globals.transition_graph->batch_limit = throttled_limit; + graph_rc = pcmk__execute_graph(controld_globals.transition_graph); + controld_globals.transition_graph->batch_limit = orig_limit; + + if (graph_rc == pcmk__graph_active) { + crm_trace("Transition not yet complete"); + return TRUE; + + } else if (graph_rc == pcmk__graph_pending) { + crm_trace("Transition not yet complete - no actions fired"); + return TRUE; + } + + if (graph_rc != pcmk__graph_complete) { + crm_warn("Transition failed: %s", + pcmk__graph_status2text(graph_rc)); + pcmk__log_graph(LOG_NOTICE, controld_globals.transition_graph); + } + } + + crm_debug("Transition %d is now complete", + controld_globals.transition_graph->id); + controld_globals.transition_graph->complete = true; + notify_crmd(controld_globals.transition_graph); + + return TRUE; +} + +/*! + * \internal + * \brief Initialize transition trigger + */ +void +controld_init_transition_trigger(void) +{ + transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, + NULL); +} + +/*! + * \internal + * \brief Destroy transition trigger + */ +void +controld_destroy_transition_trigger(void) +{ + mainloop_destroy_trigger(transition_trigger); + transition_trigger = NULL; +} + +void +controld_trigger_graph_as(const char *fn, int line) +{ + crm_trace("%s:%d - Triggered graph processing", fn, line); + mainloop_set_trigger(transition_trigger); +} + +static struct abort_timer_s { + bool aborted; + guint id; + int priority; + enum pcmk__graph_next action; + const char *text; +} abort_timer = { 0, }; + +static gboolean +abort_timer_popped(gpointer data) +{ + if (AM_I_DC && (abort_timer.aborted == FALSE)) { + abort_transition(abort_timer.priority, abort_timer.action, + abort_timer.text, NULL); + } + abort_timer.id = 0; + return FALSE; // do not immediately reschedule timer +} + +/*! + * \internal + * \brief Abort transition after delay, if not already aborted in that time + * + * \param[in] abort_text Must be literal string + */ +void +abort_after_delay(int abort_priority, enum pcmk__graph_next abort_action, + const char *abort_text, guint delay_ms) +{ + if (abort_timer.id) { + // Timer already in progress, stop and reschedule + g_source_remove(abort_timer.id); + } + abort_timer.aborted = FALSE; + abort_timer.priority = abort_priority; + abort_timer.action = abort_action; + abort_timer.text = abort_text; + abort_timer.id = g_timeout_add(delay_ms, abort_timer_popped, NULL); +} + +static const char * +abort2text(enum pcmk__graph_next abort_action) +{ + switch (abort_action) { + case pcmk__graph_done: return "done"; + case pcmk__graph_wait: return "stop"; + case pcmk__graph_restart: return "restart"; + case pcmk__graph_shutdown: return "shutdown"; + } + return "unknown"; +} + +static bool +update_abort_priority(pcmk__graph_t *graph, int priority, + enum pcmk__graph_next action, const char *abort_reason) +{ + bool change = FALSE; + + if (graph == NULL) { + return change; + } + + if (graph->abort_priority < priority) { + crm_debug("Abort priority upgraded from %d to %d", graph->abort_priority, priority); + graph->abort_priority = priority; + if (graph->abort_reason != NULL) { + crm_debug("'%s' abort superseded by %s", graph->abort_reason, abort_reason); + } + graph->abort_reason = abort_reason; + change = TRUE; + } + + if (graph->completion_action < action) { + crm_debug("Abort action %s superseded by %s: %s", + abort2text(graph->completion_action), abort2text(action), abort_reason); + graph->completion_action = action; + change = TRUE; + } + + return change; +} + +void +abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action, + const char *abort_text, const xmlNode *reason, + const char *fn, int line) +{ + int add[] = { 0, 0, 0 }; + int del[] = { 0, 0, 0 }; + int level = LOG_INFO; + const xmlNode *diff = NULL; + const xmlNode *change = NULL; + + CRM_CHECK(controld_globals.transition_graph != NULL, return); + + switch (controld_globals.fsa_state) { + case S_STARTING: + case S_PENDING: + case S_NOT_DC: + case S_HALT: + case S_ILLEGAL: + case S_STOPPING: + case S_TERMINATE: + crm_info("Abort %s suppressed: state=%s (%scomplete)", + abort_text, fsa_state2string(controld_globals.fsa_state), + (controld_globals.transition_graph->complete? "" : "in")); + return; + default: + break; + } + + abort_timer.aborted = TRUE; + controld_expect_sched_reply(NULL); + + if (!controld_globals.transition_graph->complete + && update_abort_priority(controld_globals.transition_graph, + abort_priority, abort_action, + abort_text)) { + level = LOG_NOTICE; + } + + if (reason != NULL) { + const xmlNode *search = NULL; + + for(search = reason; search; search = search->parent) { + if (pcmk__str_eq(XML_TAG_DIFF, TYPE(search), pcmk__str_casei)) { + diff = search; + break; + } + } + + if(diff) { + xml_patch_versions(diff, add, del); + for(search = reason; search; search = search->parent) { + if (pcmk__str_eq(XML_DIFF_CHANGE, TYPE(search), pcmk__str_casei)) { + change = search; + break; + } + } + } + } + + if (reason == NULL) { + do_crm_log(level, + "Transition %d aborted: %s " CRM_XS " source=%s:%d " + "complete=%s", controld_globals.transition_graph->id, + abort_text, fn, line, + pcmk__btoa(controld_globals.transition_graph->complete)); + + } else if(change == NULL) { + GString *local_path = pcmk__element_xpath(reason); + CRM_ASSERT(local_path != NULL); + + do_crm_log(level, "Transition %d aborted by %s.%s: %s " + CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", + controld_globals.transition_graph->id, TYPE(reason), + ID(reason), abort_text, add[0], add[1], add[2], fn, line, + (const char *) local_path->str, + pcmk__btoa(controld_globals.transition_graph->complete)); + g_string_free(local_path, TRUE); + + } else { + const char *kind = NULL; + const char *op = crm_element_value(change, XML_DIFF_OP); + const char *path = crm_element_value(change, XML_DIFF_PATH); + + if(change == reason) { + if(strcmp(op, "create") == 0) { + reason = reason->children; + + } else if(strcmp(op, "modify") == 0) { + reason = first_named_child(reason, XML_DIFF_RESULT); + if(reason) { + reason = reason->children; + } + } + } + + kind = TYPE(reason); + if(strcmp(op, "delete") == 0) { + const char *shortpath = strrchr(path, '/'); + + do_crm_log(level, "Transition %d aborted by deletion of %s: %s " + CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", + controld_globals.transition_graph->id, + (shortpath? (shortpath + 1) : path), abort_text, + add[0], add[1], add[2], fn, line, path, + pcmk__btoa(controld_globals.transition_graph->complete)); + + } else if (pcmk__str_eq(XML_CIB_TAG_NVPAIR, kind, pcmk__str_none)) { + do_crm_log(level, "Transition %d aborted by %s doing %s %s=%s: %s " + CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", + controld_globals.transition_graph->id, + crm_element_value(reason, XML_ATTR_ID), op, + crm_element_value(reason, XML_NVPAIR_ATTR_NAME), + crm_element_value(reason, XML_NVPAIR_ATTR_VALUE), + abort_text, add[0], add[1], add[2], fn, line, path, + pcmk__btoa(controld_globals.transition_graph->complete)); + + } else if (pcmk__str_eq(XML_LRM_TAG_RSC_OP, kind, pcmk__str_none)) { + const char *magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC); + + do_crm_log(level, "Transition %d aborted by operation %s '%s' on %s: %s " + CRM_XS " magic=%s cib=%d.%d.%d source=%s:%d complete=%s", + controld_globals.transition_graph->id, + crm_element_value(reason, XML_LRM_ATTR_TASK_KEY), op, + crm_element_value(reason, XML_LRM_ATTR_TARGET), abort_text, + magic, add[0], add[1], add[2], fn, line, + pcmk__btoa(controld_globals.transition_graph->complete)); + + } else if (pcmk__str_any_of(kind, XML_CIB_TAG_STATE, XML_CIB_TAG_NODE, NULL)) { + const char *uname = crm_peer_uname(ID(reason)); + + do_crm_log(level, "Transition %d aborted by %s '%s' on %s: %s " + CRM_XS " cib=%d.%d.%d source=%s:%d complete=%s", + controld_globals.transition_graph->id, + kind, op, (uname? uname : ID(reason)), abort_text, + add[0], add[1], add[2], fn, line, + pcmk__btoa(controld_globals.transition_graph->complete)); + + } else { + const char *id = ID(reason); + + do_crm_log(level, "Transition %d aborted by %s.%s '%s': %s " + CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", + controld_globals.transition_graph->id, + TYPE(reason), (id? id : ""), (op? op : "change"), + abort_text, add[0], add[1], add[2], fn, line, path, + pcmk__btoa(controld_globals.transition_graph->complete)); + } + } + + if (controld_globals.transition_graph->complete) { + if (controld_get_period_transition_timer() > 0) { + controld_stop_transition_timer(); + controld_start_transition_timer(); + } else { + register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL); + } + return; + } + + trigger_graph(); +} diff --git a/daemons/controld/controld_throttle.c b/daemons/controld/controld_throttle.c new file mode 100644 index 0000000..5b7f9c0 --- /dev/null +++ b/daemons/controld/controld_throttle.c @@ -0,0 +1,574 @@ +/* + * Copyright 2013-2021 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include + +/* These values don't need to be bits, but these particular values must be kept + * for backward compatibility during rolling upgrades. + */ +enum throttle_state_e { + throttle_none = 0x0000, + throttle_low = 0x0001, + throttle_med = 0x0010, + throttle_high = 0x0100, + throttle_extreme = 0x1000, +}; + +struct throttle_record_s { + int max; + enum throttle_state_e mode; + char *node; +}; + +static int throttle_job_max = 0; +static float throttle_load_target = 0.0; + +#define THROTTLE_FACTOR_LOW 1.2 +#define THROTTLE_FACTOR_MEDIUM 1.6 +#define THROTTLE_FACTOR_HIGH 2.0 + +static GHashTable *throttle_records = NULL; +static mainloop_timer_t *throttle_timer = NULL; + +static const char * +load2str(enum throttle_state_e mode) +{ + switch (mode) { + case throttle_extreme: return "extreme"; + case throttle_high: return "high"; + case throttle_med: return "medium"; + case throttle_low: return "low"; + case throttle_none: return "negligible"; + default: return "undetermined"; + } +} + +#if HAVE_LINUX_PROCFS +/*! + * \internal + * \brief Return name of /proc file containing the CIB daemon's load statistics + * + * \return Newly allocated memory with file name on success, NULL otherwise + * + * \note It is the caller's responsibility to free the return value. + * This will return NULL if the daemon is being run via valgrind. + * This should be called only on Linux systems. + */ +static char * +find_cib_loadfile(void) +{ + pid_t pid = pcmk__procfs_pid_of("pacemaker-based"); + + return pid? crm_strdup_printf("/proc/%lld/stat", (long long) pid) : NULL; +} + +static bool +throttle_cib_load(float *load) +{ +/* + /proc/[pid]/stat + Status information about the process. This is used by ps(1). It is defined in /usr/src/linux/fs/proc/array.c. + + The fields, in order, with their proper scanf(3) format specifiers, are: + + pid %d (1) The process ID. + + comm %s (2) The filename of the executable, in parentheses. This is visible whether or not the executable is swapped out. + + state %c (3) One character from the string "RSDZTW" where R is running, S is sleeping in an interruptible wait, D is waiting in uninterruptible disk sleep, Z is zombie, T is traced or stopped (on a signal), and W is paging. + + ppid %d (4) The PID of the parent. + + pgrp %d (5) The process group ID of the process. + + session %d (6) The session ID of the process. + + tty_nr %d (7) The controlling terminal of the process. (The minor device number is contained in the combination of bits 31 to 20 and 7 to 0; the major device number is in bits 15 to 8.) + + tpgid %d (8) The ID of the foreground process group of the controlling terminal of the process. + + flags %u (%lu before Linux 2.6.22) + (9) The kernel flags word of the process. For bit meanings, see the PF_* defines in the Linux kernel source file include/linux/sched.h. Details depend on the kernel version. + + minflt %lu (10) The number of minor faults the process has made which have not required loading a memory page from disk. + + cminflt %lu (11) The number of minor faults that the process's waited-for children have made. + + majflt %lu (12) The number of major faults the process has made which have required loading a memory page from disk. + + cmajflt %lu (13) The number of major faults that the process's waited-for children have made. + + utime %lu (14) Amount of time that this process has been scheduled in user mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)). This includes guest time, guest_time (time spent running a virtual CPU, see below), so that applications that are not aware of the guest time field do not lose that time from their calculations. + + stime %lu (15) Amount of time that this process has been scheduled in kernel mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)). + */ + + static char *loadfile = NULL; + static time_t last_call = 0; + static long ticks_per_s = 0; + static unsigned long last_utime, last_stime; + + char buffer[64*1024]; + FILE *stream = NULL; + time_t now = time(NULL); + + if(load == NULL) { + return FALSE; + } else { + *load = 0.0; + } + + if(loadfile == NULL) { + last_call = 0; + last_utime = 0; + last_stime = 0; + loadfile = find_cib_loadfile(); + if (loadfile == NULL) { + crm_warn("Couldn't find CIB load file"); + return FALSE; + } + ticks_per_s = sysconf(_SC_CLK_TCK); + crm_trace("Found %s", loadfile); + } + + stream = fopen(loadfile, "r"); + if(stream == NULL) { + int rc = errno; + + crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_strerror(rc), rc); + free(loadfile); loadfile = NULL; + return FALSE; + } + + if(fgets(buffer, sizeof(buffer), stream)) { + char *comm = calloc(1, 256); + char state = 0; + int rc = 0, pid = 0, ppid = 0, pgrp = 0, session = 0, tty_nr = 0, tpgid = 0; + unsigned long flags = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0, utime = 0, stime = 0; + + rc = sscanf(buffer, "%d %[^ ] %c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu", + &pid, comm, &state, + &ppid, &pgrp, &session, &tty_nr, &tpgid, + &flags, &minflt, &cminflt, &majflt, &cmajflt, &utime, &stime); + free(comm); + + if(rc != 15) { + crm_err("Only %d of 15 fields found in %s", rc, loadfile); + fclose(stream); + return FALSE; + + } else if(last_call > 0 + && last_call < now + && last_utime <= utime + && last_stime <= stime) { + + time_t elapsed = now - last_call; + unsigned long delta_utime = utime - last_utime; + unsigned long delta_stime = stime - last_stime; + + *load = (delta_utime + delta_stime); /* Cast to a float before division */ + *load /= ticks_per_s; + *load /= elapsed; + crm_debug("cib load: %f (%lu ticks in %lds)", *load, delta_utime + delta_stime, (long)elapsed); + + } else { + crm_debug("Init %lu + %lu ticks at %ld (%lu tps)", utime, stime, (long)now, ticks_per_s); + } + + last_call = now; + last_utime = utime; + last_stime = stime; + + fclose(stream); + return TRUE; + } + + fclose(stream); + return FALSE; +} + +static bool +throttle_load_avg(float *load) +{ + char buffer[256]; + FILE *stream = NULL; + const char *loadfile = "/proc/loadavg"; + + if(load == NULL) { + return FALSE; + } + + stream = fopen(loadfile, "r"); + if(stream == NULL) { + int rc = errno; + crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_strerror(rc), rc); + return FALSE; + } + + if(fgets(buffer, sizeof(buffer), stream)) { + char *nl = strstr(buffer, "\n"); + + /* Grab the 1-minute average, ignore the rest */ + *load = strtof(buffer, NULL); + if(nl) { nl[0] = 0; } + + fclose(stream); + return TRUE; + } + + fclose(stream); + return FALSE; +} + +/*! + * \internal + * \brief Check a load value against throttling thresholds + * + * \param[in] load Load value to check + * \param[in] desc Description of metric (for logging) + * \param[in] thresholds Low/medium/high/extreme thresholds + * + * \return Throttle mode corresponding to load value + */ +static enum throttle_state_e +throttle_check_thresholds(float load, const char *desc, + const float thresholds[4]) +{ + if (load > thresholds[3]) { + crm_notice("Extreme %s detected: %f", desc, load); + return throttle_extreme; + + } else if (load > thresholds[2]) { + crm_notice("High %s detected: %f", desc, load); + return throttle_high; + + } else if (load > thresholds[1]) { + crm_info("Moderate %s detected: %f", desc, load); + return throttle_med; + + } else if (load > thresholds[0]) { + crm_debug("Noticeable %s detected: %f", desc, load); + return throttle_low; + } + + crm_trace("Negligible %s detected: %f", desc, load); + return throttle_none; +} + +static enum throttle_state_e +throttle_handle_load(float load, const char *desc, int cores) +{ + float normalize; + float thresholds[4]; + + if (cores == 1) { + /* On a single core machine, a load of 1.0 is already too high */ + normalize = 0.6; + + } else { + /* Normalize the load to be per-core */ + normalize = cores; + } + thresholds[0] = throttle_load_target * normalize * THROTTLE_FACTOR_LOW; + thresholds[1] = throttle_load_target * normalize * THROTTLE_FACTOR_MEDIUM; + thresholds[2] = throttle_load_target * normalize * THROTTLE_FACTOR_HIGH; + thresholds[3] = load + 1.0; /* never extreme */ + + return throttle_check_thresholds(load, desc, thresholds); +} +#endif // HAVE_LINUX_PROCFS + +static enum throttle_state_e +throttle_mode(void) +{ + enum throttle_state_e mode = throttle_none; + +#if HAVE_LINUX_PROCFS + unsigned int cores; + float load; + float thresholds[4]; + + cores = pcmk__procfs_num_cores(); + if(throttle_cib_load(&load)) { + float cib_max_cpu = 0.95; + + /* The CIB is a single-threaded task and thus cannot consume + * more than 100% of a CPU (and 1/cores of the overall system + * load). + * + * On a many-cored system, the CIB might therefore be maxed out + * (causing operations to fail or appear to fail) even though + * the overall system load is still reasonable. + * + * Therefore, the 'normal' thresholds can not apply here, and we + * need a special case. + */ + if(cores == 1) { + cib_max_cpu = 0.4; + } + if(throttle_load_target > 0.0 && throttle_load_target < cib_max_cpu) { + cib_max_cpu = throttle_load_target; + } + + thresholds[0] = cib_max_cpu * 0.8; + thresholds[1] = cib_max_cpu * 0.9; + thresholds[2] = cib_max_cpu; + /* Can only happen on machines with a low number of cores */ + thresholds[3] = cib_max_cpu * 1.5; + + mode = throttle_check_thresholds(load, "CIB load", thresholds); + } + + if(throttle_load_target <= 0) { + /* If we ever make this a valid value, the cluster will at least behave as expected */ + return mode; + } + + if(throttle_load_avg(&load)) { + enum throttle_state_e cpu_load; + + cpu_load = throttle_handle_load(load, "CPU load", cores); + if (cpu_load > mode) { + mode = cpu_load; + } + crm_debug("Current load is %f across %u core(s)", load, cores); + } +#endif // HAVE_LINUX_PROCFS + return mode; +} + +static void +throttle_send_command(enum throttle_state_e mode) +{ + xmlNode *xml = NULL; + static enum throttle_state_e last = -1; + + if(mode != last) { + crm_info("New throttle mode: %s load (was %s)", + load2str(mode), load2str(last)); + last = mode; + + xml = create_request(CRM_OP_THROTTLE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); + crm_xml_add_int(xml, F_CRM_THROTTLE_MODE, mode); + crm_xml_add_int(xml, F_CRM_THROTTLE_MAX, throttle_job_max); + + send_cluster_message(NULL, crm_msg_crmd, xml, TRUE); + free_xml(xml); + } +} + +static gboolean +throttle_timer_cb(gpointer data) +{ + throttle_send_command(throttle_mode()); + return TRUE; +} + +static void +throttle_record_free(gpointer p) +{ + struct throttle_record_s *r = p; + free(r->node); + free(r); +} + +static void +throttle_set_load_target(float target) +{ + throttle_load_target = target; +} + +/*! + * \internal + * \brief Update the maximum number of simultaneous jobs + * + * \param[in] preference Cluster-wide node-action-limit from the CIB + */ +static void +throttle_update_job_max(const char *preference) +{ + long long max = 0LL; + const char *env_limit = getenv("PCMK_node_action_limit"); + + if (env_limit != NULL) { + preference = env_limit; // Per-node override + } + if (preference != NULL) { + pcmk__scan_ll(preference, &max, 0LL); + } + if (max > 0) { + throttle_job_max = (int) max; + } else { + // Default is based on the number of cores detected + throttle_job_max = 2 * pcmk__procfs_num_cores(); + } +} + +void +throttle_init(void) +{ + if(throttle_records == NULL) { + throttle_records = pcmk__strkey_table(NULL, throttle_record_free); + throttle_timer = mainloop_timer_add("throttle", 30 * 1000, TRUE, throttle_timer_cb, NULL); + } + + throttle_update_job_max(NULL); + mainloop_timer_start(throttle_timer); +} + +/*! + * \internal + * \brief Configure throttle options based on the CIB + * + * \param[in,out] options Name/value pairs for configured options + */ +void +controld_configure_throttle(GHashTable *options) +{ + const char *value = g_hash_table_lookup(options, "load-threshold"); + + if (value != NULL) { + throttle_set_load_target(strtof(value, NULL) / 100.0); + } + + value = g_hash_table_lookup(options, "node-action-limit"); + throttle_update_job_max(value); +} + +void +throttle_fini(void) +{ + if (throttle_timer != NULL) { + mainloop_timer_del(throttle_timer); + throttle_timer = NULL; + } + if (throttle_records != NULL) { + g_hash_table_destroy(throttle_records); + throttle_records = NULL; + } +} + +int +throttle_get_total_job_limit(int l) +{ + /* Cluster-wide limit */ + GHashTableIter iter; + int limit = l; + int peers = crm_active_peers(); + struct throttle_record_s *r = NULL; + + g_hash_table_iter_init(&iter, throttle_records); + + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &r)) { + switch(r->mode) { + + case throttle_extreme: + if(limit == 0 || limit > peers/4) { + limit = QB_MAX(1, peers/4); + } + break; + + case throttle_high: + if(limit == 0 || limit > peers/2) { + limit = QB_MAX(1, peers/2); + } + break; + default: + break; + } + } + if(limit == l) { + /* crm_trace("No change to batch-limit=%d", limit); */ + + } else if(l == 0) { + crm_trace("Using batch-limit=%d", limit); + + } else { + crm_trace("Using batch-limit=%d instead of %d", limit, l); + } + return limit; +} + +int +throttle_get_job_limit(const char *node) +{ + int jobs = 1; + struct throttle_record_s *r = NULL; + + r = g_hash_table_lookup(throttle_records, node); + if(r == NULL) { + r = calloc(1, sizeof(struct throttle_record_s)); + r->node = strdup(node); + r->mode = throttle_low; + r->max = throttle_job_max; + crm_trace("Defaulting to local values for unknown node %s", node); + + g_hash_table_insert(throttle_records, r->node, r); + } + + switch(r->mode) { + case throttle_extreme: + case throttle_high: + jobs = 1; /* At least one job must always be allowed */ + break; + case throttle_med: + jobs = QB_MAX(1, r->max / 4); + break; + case throttle_low: + jobs = QB_MAX(1, r->max / 2); + break; + case throttle_none: + jobs = QB_MAX(1, r->max); + break; + default: + crm_err("Unknown throttle mode %.4x on %s", r->mode, node); + break; + } + return jobs; +} + +void +throttle_update(xmlNode *xml) +{ + int max = 0; + int mode = 0; + struct throttle_record_s *r = NULL; + const char *from = crm_element_value(xml, F_CRM_HOST_FROM); + + crm_element_value_int(xml, F_CRM_THROTTLE_MODE, &mode); + crm_element_value_int(xml, F_CRM_THROTTLE_MAX, &max); + + r = g_hash_table_lookup(throttle_records, from); + + if(r == NULL) { + r = calloc(1, sizeof(struct throttle_record_s)); + r->node = strdup(from); + g_hash_table_insert(throttle_records, r->node, r); + } + + r->max = max; + r->mode = (enum throttle_state_e) mode; + + crm_debug("Node %s has %s load and supports at most %d jobs; new job limit %d", + from, load2str((enum throttle_state_e) mode), max, + throttle_get_job_limit(from)); +} diff --git a/daemons/controld/controld_throttle.h b/daemons/controld/controld_throttle.h new file mode 100644 index 0000000..a798c6c --- /dev/null +++ b/daemons/controld/controld_throttle.h @@ -0,0 +1,16 @@ +/* + * Copyright 2013-2021 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +void throttle_init(void); +void throttle_fini(void); +void controld_configure_throttle(GHashTable *options); + +void throttle_update(xmlNode *xml); +int throttle_get_job_limit(const char *node); +int throttle_get_total_job_limit(int l); diff --git a/daemons/controld/controld_timers.c b/daemons/controld/controld_timers.c new file mode 100644 index 0000000..a65bef5 --- /dev/null +++ b/daemons/controld/controld_timers.c @@ -0,0 +1,509 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include + +#include +#include +#include + +//! FSA mainloop timer type +typedef struct fsa_timer_s { + guint source_id; //!< Timer source ID + guint period_ms; //!< Timer period + enum crmd_fsa_input fsa_input; //!< Input to register if timer pops + gboolean (*callback) (gpointer data); //!< What do if timer pops + bool log_error; //!< Timer popping indicates error + int counter; //!< For detecting loops +} fsa_timer_t; + +//! Wait before retrying a failed cib or executor connection +static fsa_timer_t *wait_timer = NULL; + +//! Periodically re-run scheduler (for date_spec evaluation and as a failsafe) +static fsa_timer_t *recheck_timer = NULL; + +//! Wait at start-up, or after an election, for DC to make contact +static fsa_timer_t *election_timer = NULL; + +//! Delay start of new transition with expectation something else might happen +static fsa_timer_t *transition_timer = NULL; + +//! join-integration-timeout +static fsa_timer_t *integration_timer = NULL; + +//! join-finalization-timeout +static fsa_timer_t *finalization_timer = NULL; + +// Wait for DC to stop all resources and give us the all-clear to shut down +fsa_timer_t *shutdown_escalation_timer = NULL; + +//! Cluster recheck interval (from configuration) +static guint recheck_interval_ms = 0; + +static const char * +get_timer_desc(fsa_timer_t * timer) +{ + if (timer == election_timer) { + return "Election Trigger"; + + } else if (timer == shutdown_escalation_timer) { + return "Shutdown Escalation"; + + } else if (timer == integration_timer) { + return "Integration Timer"; + + } else if (timer == finalization_timer) { + return "Finalization Timer"; + + } else if (timer == transition_timer) { + return "New Transition Timer"; + + } else if (timer == wait_timer) { + return "Wait Timer"; + + } else if (timer == recheck_timer) { + return "Cluster Recheck Timer"; + + } + return "Unknown Timer"; +} + +/*! + * \internal + * \brief Stop an FSA timer + * + * \param[in,out] timer Timer to stop + * + * \return true if the timer was running, or false otherwise + */ +static bool +controld_stop_timer(fsa_timer_t *timer) +{ + CRM_CHECK(timer != NULL, return false); + + if (timer->source_id != 0) { + crm_trace("Stopping %s (would inject %s if popped after %ums, src=%d)", + get_timer_desc(timer), fsa_input2string(timer->fsa_input), + timer->period_ms, timer->source_id); + g_source_remove(timer->source_id); + timer->source_id = 0; + + } else { + crm_trace("%s already stopped (would inject %s if popped after %ums)", + get_timer_desc(timer), fsa_input2string(timer->fsa_input), + timer->period_ms); + return false; + } + return true; +} + +/*! + * \internal + * \brief Start an FSA timer + * + * \param[in,out] timer Timer to start + */ +static void +controld_start_timer(fsa_timer_t *timer) +{ + if (timer->source_id == 0 && timer->period_ms > 0) { + timer->source_id = g_timeout_add(timer->period_ms, timer->callback, (void *)timer); + CRM_ASSERT(timer->source_id != 0); + crm_debug("Started %s (inject %s if pops after %ums, source=%d)", + get_timer_desc(timer), fsa_input2string(timer->fsa_input), + timer->period_ms, timer->source_id); + } else { + crm_debug("%s already running (inject %s if pops after %ums, source=%d)", + get_timer_desc(timer), fsa_input2string(timer->fsa_input), + timer->period_ms, timer->source_id); + } +} + +/* A_DC_TIMER_STOP, A_DC_TIMER_START, + * A_FINALIZE_TIMER_STOP, A_FINALIZE_TIMER_START + * A_INTEGRATE_TIMER_STOP, A_INTEGRATE_TIMER_START + */ +void +do_timer_control(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + gboolean timer_op_ok = TRUE; + + if (action & A_DC_TIMER_STOP) { + timer_op_ok = controld_stop_timer(election_timer); + + } else if (action & A_FINALIZE_TIMER_STOP) { + timer_op_ok = controld_stop_timer(finalization_timer); + + } else if (action & A_INTEGRATE_TIMER_STOP) { + timer_op_ok = controld_stop_timer(integration_timer); + } + + /* don't start a timer that wasn't already running */ + if (action & A_DC_TIMER_START && timer_op_ok) { + controld_start_timer(election_timer); + if (AM_I_DC) { + /* there can be only one */ + register_fsa_input(cause, I_ELECTION, NULL); + } + + } else if (action & A_FINALIZE_TIMER_START) { + controld_start_timer(finalization_timer); + + } else if (action & A_INTEGRATE_TIMER_START) { + controld_start_timer(integration_timer); + } +} + +static gboolean +crm_timer_popped(gpointer data) +{ + fsa_timer_t *timer = (fsa_timer_t *) data; + + if (timer->log_error) { + crm_err("%s just popped in state %s! " CRM_XS " input=%s time=%ums", + get_timer_desc(timer), + fsa_state2string(controld_globals.fsa_state), + fsa_input2string(timer->fsa_input), timer->period_ms); + } else { + crm_info("%s just popped " CRM_XS " input=%s time=%ums", + get_timer_desc(timer), fsa_input2string(timer->fsa_input), + timer->period_ms); + timer->counter++; + } + + if ((timer == election_timer) && (election_timer->counter > 5)) { + crm_notice("We appear to be in an election loop, something may be wrong"); + crm_write_blackbox(0, NULL); + election_timer->counter = 0; + } + + controld_stop_timer(timer); // Make timer _not_ go off again + + if (timer->fsa_input == I_INTEGRATED) { + crm_info("Welcomed: %d, Integrated: %d", + crmd_join_phase_count(crm_join_welcomed), + crmd_join_phase_count(crm_join_integrated)); + if (crmd_join_phase_count(crm_join_welcomed) == 0) { + // If we don't even have ourselves, start again + register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, NULL, + __func__); + + } else { + register_fsa_input_before(C_TIMER_POPPED, timer->fsa_input, NULL); + } + + } else if ((timer == recheck_timer) + && (controld_globals.fsa_state != S_IDLE)) { + crm_debug("Discarding %s event in state: %s", + fsa_input2string(timer->fsa_input), + fsa_state2string(controld_globals.fsa_state)); + + } else if ((timer == finalization_timer) + && (controld_globals.fsa_state != S_FINALIZE_JOIN)) { + crm_debug("Discarding %s event in state: %s", + fsa_input2string(timer->fsa_input), + fsa_state2string(controld_globals.fsa_state)); + + } else if (timer->fsa_input != I_NULL) { + register_fsa_input(C_TIMER_POPPED, timer->fsa_input, NULL); + } + + controld_trigger_fsa(); + + return TRUE; +} + +bool +controld_init_fsa_timers(void) +{ + transition_timer = calloc(1, sizeof(fsa_timer_t)); + if (transition_timer == NULL) { + return FALSE; + } + + integration_timer = calloc(1, sizeof(fsa_timer_t)); + if (integration_timer == NULL) { + return FALSE; + } + + finalization_timer = calloc(1, sizeof(fsa_timer_t)); + if (finalization_timer == NULL) { + return FALSE; + } + + election_timer = calloc(1, sizeof(fsa_timer_t)); + if (election_timer == NULL) { + return FALSE; + } + + shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t)); + if (shutdown_escalation_timer == NULL) { + return FALSE; + } + + wait_timer = calloc(1, sizeof(fsa_timer_t)); + if (wait_timer == NULL) { + return FALSE; + } + + recheck_timer = calloc(1, sizeof(fsa_timer_t)); + if (recheck_timer == NULL) { + return FALSE; + } + + election_timer->source_id = 0; + election_timer->period_ms = 0; + election_timer->fsa_input = I_DC_TIMEOUT; + election_timer->callback = crm_timer_popped; + election_timer->log_error = FALSE; + + transition_timer->source_id = 0; + transition_timer->period_ms = 0; + transition_timer->fsa_input = I_PE_CALC; + transition_timer->callback = crm_timer_popped; + transition_timer->log_error = FALSE; + + integration_timer->source_id = 0; + integration_timer->period_ms = 0; + integration_timer->fsa_input = I_INTEGRATED; + integration_timer->callback = crm_timer_popped; + integration_timer->log_error = TRUE; + + finalization_timer->source_id = 0; + finalization_timer->period_ms = 0; + finalization_timer->fsa_input = I_FINALIZED; + finalization_timer->callback = crm_timer_popped; + finalization_timer->log_error = FALSE; + + /* We can't use I_FINALIZED here, because that creates a bug in the join + * process where a joining node can be stuck in S_PENDING while we think it + * is in S_NOT_DC. This created an infinite transition loop in which we + * continually send probes which the node NACKs because it's pending. + * + * If we have nodes where the cluster layer is active but the controller is + * not, we can avoid this causing an election/join loop, in the integration + * phase. + */ + finalization_timer->fsa_input = I_ELECTION; + + shutdown_escalation_timer->source_id = 0; + shutdown_escalation_timer->period_ms = 0; + shutdown_escalation_timer->fsa_input = I_STOP; + shutdown_escalation_timer->callback = crm_timer_popped; + shutdown_escalation_timer->log_error = TRUE; + + wait_timer->source_id = 0; + wait_timer->period_ms = 2000; + wait_timer->fsa_input = I_NULL; + wait_timer->callback = crm_timer_popped; + wait_timer->log_error = FALSE; + + recheck_timer->source_id = 0; + recheck_timer->period_ms = 0; + recheck_timer->fsa_input = I_PE_CALC; + recheck_timer->callback = crm_timer_popped; + recheck_timer->log_error = FALSE; + + return TRUE; +} + +/*! + * \internal + * \brief Configure timers based on the CIB + * + * \param[in,out] options Name/value pairs for configured options + */ +void +controld_configure_fsa_timers(GHashTable *options) +{ + const char *value = NULL; + + // Election timer + value = g_hash_table_lookup(options, XML_CONFIG_ATTR_DC_DEADTIME); + election_timer->period_ms = crm_parse_interval_spec(value); + + // Integration timer + value = g_hash_table_lookup(options, "join-integration-timeout"); + integration_timer->period_ms = crm_parse_interval_spec(value); + + // Finalization timer + value = g_hash_table_lookup(options, "join-finalization-timeout"); + finalization_timer->period_ms = crm_parse_interval_spec(value); + + // Shutdown escalation timer + value = g_hash_table_lookup(options, XML_CONFIG_ATTR_FORCE_QUIT); + shutdown_escalation_timer->period_ms = crm_parse_interval_spec(value); + crm_debug("Shutdown escalation occurs if DC has not responded to request " + "in %ums", shutdown_escalation_timer->period_ms); + + // Transition timer + value = g_hash_table_lookup(options, "transition-delay"); + transition_timer->period_ms = crm_parse_interval_spec(value); + + // Recheck interval + value = g_hash_table_lookup(options, XML_CONFIG_ATTR_RECHECK); + recheck_interval_ms = crm_parse_interval_spec(value); + crm_debug("Re-run scheduler after %dms of inactivity", recheck_interval_ms); +} + +void +controld_free_fsa_timers(void) +{ + controld_stop_timer(transition_timer); + controld_stop_timer(integration_timer); + controld_stop_timer(finalization_timer); + controld_stop_timer(election_timer); + controld_stop_timer(shutdown_escalation_timer); + controld_stop_timer(wait_timer); + controld_stop_timer(recheck_timer); + + free(transition_timer); transition_timer = NULL; + free(integration_timer); integration_timer = NULL; + free(finalization_timer); finalization_timer = NULL; + free(election_timer); election_timer = NULL; + free(shutdown_escalation_timer); shutdown_escalation_timer = NULL; + free(wait_timer); wait_timer = NULL; + free(recheck_timer); recheck_timer = NULL; +} + +/*! + * \internal + * \brief Check whether the transition timer is started + * \return true if the transition timer is started, or false otherwise + */ +bool +controld_is_started_transition_timer(void) +{ + return (transition_timer->period_ms > 0) + && (transition_timer->source_id != 0); +} + +/*! + * \internal + * \brief Start the recheck timer + */ +void +controld_start_recheck_timer(void) +{ + // Default to recheck interval configured in CIB (if any) + guint period_ms = recheck_interval_ms; + + // If scheduler supplied a "recheck by" time, check whether that's sooner + if (controld_globals.transition_graph->recheck_by > 0) { + time_t diff_seconds = controld_globals.transition_graph->recheck_by + - time(NULL); + + if (diff_seconds < 1) { + // We're already past the desired time + period_ms = 500; + } else { + period_ms = (guint) diff_seconds * 1000; + } + + // Use "recheck by" only if it's sooner than interval from CIB + if (period_ms > recheck_interval_ms) { + period_ms = recheck_interval_ms; + } + } + + if (period_ms > 0) { + recheck_timer->period_ms = period_ms; + controld_start_timer(recheck_timer); + } +} + +/*! + * \internal + * \brief Start the wait timer + */ +void +controld_start_wait_timer(void) +{ + controld_start_timer(wait_timer); +} + +/*! + * \internal + * \brief Stop the recheck timer + * + * \return true if the recheck timer was running, or false otherwise + */ +bool +controld_stop_recheck_timer(void) +{ + return controld_stop_timer(recheck_timer); +} + +/*! + * \brief Get the transition timer's configured period + * \return The transition_timer's period + */ +guint +controld_get_period_transition_timer(void) +{ + return transition_timer->period_ms; +} + +/*! + * \internal + * \brief Reset the election timer's counter to 0 + */ +void +controld_reset_counter_election_timer(void) +{ + election_timer->counter = 0; +} + +/*! + * \internal + * \brief Stop the transition timer + * + * \return true if the transition timer was running, or false otherwise + */ +bool +controld_stop_transition_timer(void) +{ + return controld_stop_timer(transition_timer); +} + +/*! + * \internal + * \brief Start the transition timer + */ +void +controld_start_transition_timer(void) +{ + controld_start_timer(transition_timer); +} + +/*! + * \internal + * \brief Start the countdown sequence for a shutdown + * + * \param[in] default_period_ms Period to use if the shutdown escalation + * timer's period is 0 + */ +void +controld_shutdown_start_countdown(guint default_period_ms) +{ + if (shutdown_escalation_timer->period_ms == 0) { + shutdown_escalation_timer->period_ms = default_period_ms; + } + + crm_notice("Initiating controller shutdown sequence " CRM_XS " limit=%ums", + shutdown_escalation_timer->period_ms); + controld_start_timer(shutdown_escalation_timer); +} diff --git a/daemons/controld/controld_timers.h b/daemons/controld/controld_timers.h new file mode 100644 index 0000000..587f4d1 --- /dev/null +++ b/daemons/controld/controld_timers.h @@ -0,0 +1,36 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef CONTROLD_TIMERS__H +# define CONTROLD_TIMERS__H + +# include // bool +# include // gboolean, gpointer, guint +# include // crmd_fsa_input + +bool controld_init_fsa_timers(void); +void controld_free_fsa_timers(void); +void controld_configure_fsa_timers(GHashTable *options); + +bool controld_stop_recheck_timer(void); +bool controld_stop_transition_timer(void); + +void controld_start_recheck_timer(void); +void controld_start_transition_timer(void); +void controld_start_wait_timer(void); + +bool controld_is_started_transition_timer(void); + +guint controld_get_period_transition_timer(void); + +void controld_reset_counter_election_timer(void); + +void controld_shutdown_start_countdown(guint default_period_ms); + +#endif diff --git a/daemons/controld/controld_transition.c b/daemons/controld/controld_transition.c new file mode 100644 index 0000000..c8a342c --- /dev/null +++ b/daemons/controld/controld_transition.c @@ -0,0 +1,197 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include + +#include + +static void +global_cib_callback(const xmlNode * msg, int callid, int rc, xmlNode * output) +{ +} + +static pcmk__graph_t * +create_blank_graph(void) +{ + pcmk__graph_t *a_graph = pcmk__unpack_graph(NULL, NULL); + + a_graph->complete = true; + a_graph->abort_reason = "DC Takeover"; + a_graph->completion_action = pcmk__graph_restart; + return a_graph; +} + +/* A_TE_START, A_TE_STOP, O_TE_RESTART */ +void +do_te_control(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + cib_t *cib_conn = controld_globals.cib_conn; + gboolean init_ok = TRUE; + + if (pcmk_is_set(action, A_TE_STOP)) { + pcmk__free_graph(controld_globals.transition_graph); + controld_globals.transition_graph = NULL; + + if (cib_conn != NULL) { + cib_conn->cmds->del_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY, + te_update_diff); + } + + controld_clear_fsa_input_flags(R_TE_CONNECTED); + crm_info("Transitioner is now inactive"); + } + + if ((action & A_TE_START) == 0) { + return; + + } else if (pcmk_is_set(controld_globals.fsa_input_register, + R_TE_CONNECTED)) { + crm_debug("The transitioner is already active"); + return; + + } else if ((action & A_TE_START) && cur_state == S_STOPPING) { + crm_info("Ignoring request to start the transitioner while shutting down"); + return; + } + + if (controld_globals.te_uuid == NULL) { + controld_globals.te_uuid = crm_generate_uuid(); + crm_info("Registering TE UUID: %s", controld_globals.te_uuid); + } + + if (cib_conn == NULL) { + crm_err("Could not set CIB callbacks"); + init_ok = FALSE; + + } else { + if (cib_conn->cmds->add_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY, + te_update_diff) != pcmk_ok) { + crm_err("Could not set CIB notification callback"); + init_ok = FALSE; + } + + if (cib_conn->cmds->set_op_callback(cib_conn, + global_cib_callback) != pcmk_ok) { + crm_err("Could not set CIB global callback"); + init_ok = FALSE; + } + } + + if (init_ok) { + controld_register_graph_functions(); + pcmk__free_graph(controld_globals.transition_graph); + + /* create a blank one */ + crm_debug("Transitioner is now active"); + controld_globals.transition_graph = create_blank_graph(); + controld_set_fsa_input_flags(R_TE_CONNECTED); + } +} + +/* A_TE_INVOKE, A_TE_CANCEL */ +void +do_te_invoke(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + + if (!AM_I_DC + || ((controld_globals.fsa_state != S_TRANSITION_ENGINE) + && pcmk_is_set(action, A_TE_INVOKE))) { + crm_notice("No need to invoke the TE (%s) in state %s", + fsa_action2string(action), + fsa_state2string(controld_globals.fsa_state)); + return; + } + + if (action & A_TE_CANCEL) { + crm_debug("Cancelling the transition: %sactive", + controld_globals.transition_graph->complete? "in" : ""); + abort_transition(INFINITY, pcmk__graph_restart, "Peer Cancelled", NULL); + if (!controld_globals.transition_graph->complete) { + crmd_fsa_stall(FALSE); + } + + } else if (action & A_TE_HALT) { + abort_transition(INFINITY, pcmk__graph_wait, "Peer Halt", NULL); + if (!controld_globals.transition_graph->complete) { + crmd_fsa_stall(FALSE); + } + + } else if (action & A_TE_INVOKE) { + ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); + xmlNode *graph_data = input->xml; + const char *ref = crm_element_value(input->msg, XML_ATTR_REFERENCE); + const char *graph_file = crm_element_value(input->msg, F_CRM_TGRAPH); + const char *graph_input = crm_element_value(input->msg, F_CRM_TGRAPH_INPUT); + + if (graph_file == NULL && graph_data == NULL) { + crm_log_xml_err(input->msg, "Bad command"); + register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); + return; + } + + if (!controld_globals.transition_graph->complete) { + crm_info("Another transition is already active"); + abort_transition(INFINITY, pcmk__graph_restart, "Transition Active", + NULL); + return; + } + + if ((controld_globals.fsa_pe_ref == NULL) + || !pcmk__str_eq(controld_globals.fsa_pe_ref, ref, + pcmk__str_none)) { + crm_info("Transition is redundant: %s expected but %s received", + pcmk__s(controld_globals.fsa_pe_ref, "no reference"), + pcmk__s(ref, "no reference")); + abort_transition(INFINITY, pcmk__graph_restart, + "Transition Redundant", NULL); + } + + if (graph_data == NULL && graph_file != NULL) { + graph_data = filename2xml(graph_file); + } + + if (controld_is_started_transition_timer()) { + crm_debug("The transitioner wait for a transition timer"); + return; + } + + CRM_CHECK(graph_data != NULL, + crm_err("Input raised by %s is invalid", msg_data->origin); + crm_log_xml_err(input->msg, "Bad command"); + return); + + pcmk__free_graph(controld_globals.transition_graph); + controld_globals.transition_graph = pcmk__unpack_graph(graph_data, + graph_input); + CRM_CHECK(controld_globals.transition_graph != NULL, + controld_globals.transition_graph = create_blank_graph(); + return); + crm_info("Processing graph %d (ref=%s) derived from %s", + controld_globals.transition_graph->id, ref, graph_input); + + te_reset_job_counts(); + + trigger_graph(); + pcmk__log_graph(LOG_TRACE, controld_globals.transition_graph); + + if (graph_data != input->xml) { + free_xml(graph_data); + } + } +} diff --git a/daemons/controld/controld_transition.h b/daemons/controld/controld_transition.h new file mode 100644 index 0000000..2da4221 --- /dev/null +++ b/daemons/controld/controld_transition.h @@ -0,0 +1,63 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef TENGINE__H +# define TENGINE__H + +# include +# include +# include +# include + +/* tengine */ +pcmk__graph_action_t *match_down_event(const char *target); +pcmk__graph_action_t *get_cancel_action(const char *id, const char *node); +bool confirm_cancel_action(const char *id, const char *node_id); + +void controld_record_action_timeout(pcmk__graph_action_t *action); + +void controld_destroy_outside_events_table(void); +void controld_remove_all_outside_events(void); + +gboolean fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node); +void process_graph_event(xmlNode *event, const char *event_node); + +/* utils */ +pcmk__graph_action_t *controld_get_action(int id); +gboolean stop_te_timer(pcmk__graph_action_t *action); +const char *get_rsc_state(const char *task, enum pcmk_exec_status status); + +void process_te_message(xmlNode *msg, xmlNode *xml_data); + +void controld_register_graph_functions(void); + +void notify_crmd(pcmk__graph_t * graph); + +void cib_action_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, + void *user_data); +gboolean action_timer_callback(gpointer data); +void te_update_diff(const char *event, xmlNode *msg); + +void controld_init_transition_trigger(void); +void controld_destroy_transition_trigger(void); + +void controld_trigger_graph_as(const char *fn, int line); +void abort_after_delay(int abort_priority, enum pcmk__graph_next abort_action, + const char *abort_text, guint delay_ms); +void abort_transition_graph(int abort_priority, + enum pcmk__graph_next abort_action, + const char *abort_text, const xmlNode *reason, + const char *fn, int line); + +# define trigger_graph() controld_trigger_graph_as(__func__, __LINE__) +# define abort_transition(pri, action, text, reason) \ + abort_transition_graph(pri, action, text, reason,__func__,__LINE__); + +void te_action_confirmed(pcmk__graph_action_t *action, pcmk__graph_t *graph); +void te_reset_job_counts(void); + +#endif diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c new file mode 100644 index 0000000..4ce09d9 --- /dev/null +++ b/daemons/controld/controld_utils.c @@ -0,0 +1,837 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include // uint64_t + +#include +#include +#include +#include + +#include + +const char * +fsa_input2string(enum crmd_fsa_input input) +{ + const char *inputAsText = NULL; + + switch (input) { + case I_NULL: + inputAsText = "I_NULL"; + break; + case I_CIB_OP: + inputAsText = "I_CIB_OP (unused)"; + break; + case I_CIB_UPDATE: + inputAsText = "I_CIB_UPDATE"; + break; + case I_DC_TIMEOUT: + inputAsText = "I_DC_TIMEOUT"; + break; + case I_ELECTION: + inputAsText = "I_ELECTION"; + break; + case I_PE_CALC: + inputAsText = "I_PE_CALC"; + break; + case I_RELEASE_DC: + inputAsText = "I_RELEASE_DC"; + break; + case I_ELECTION_DC: + inputAsText = "I_ELECTION_DC"; + break; + case I_ERROR: + inputAsText = "I_ERROR"; + break; + case I_FAIL: + inputAsText = "I_FAIL"; + break; + case I_INTEGRATED: + inputAsText = "I_INTEGRATED"; + break; + case I_FINALIZED: + inputAsText = "I_FINALIZED"; + break; + case I_NODE_JOIN: + inputAsText = "I_NODE_JOIN"; + break; + case I_JOIN_OFFER: + inputAsText = "I_JOIN_OFFER"; + break; + case I_JOIN_REQUEST: + inputAsText = "I_JOIN_REQUEST"; + break; + case I_JOIN_RESULT: + inputAsText = "I_JOIN_RESULT"; + break; + case I_NOT_DC: + inputAsText = "I_NOT_DC"; + break; + case I_RECOVERED: + inputAsText = "I_RECOVERED"; + break; + case I_RELEASE_FAIL: + inputAsText = "I_RELEASE_FAIL"; + break; + case I_RELEASE_SUCCESS: + inputAsText = "I_RELEASE_SUCCESS"; + break; + case I_RESTART: + inputAsText = "I_RESTART"; + break; + case I_PE_SUCCESS: + inputAsText = "I_PE_SUCCESS"; + break; + case I_ROUTER: + inputAsText = "I_ROUTER"; + break; + case I_SHUTDOWN: + inputAsText = "I_SHUTDOWN"; + break; + case I_STARTUP: + inputAsText = "I_STARTUP"; + break; + case I_TE_SUCCESS: + inputAsText = "I_TE_SUCCESS"; + break; + case I_STOP: + inputAsText = "I_STOP"; + break; + case I_DC_HEARTBEAT: + inputAsText = "I_DC_HEARTBEAT"; + break; + case I_WAIT_FOR_EVENT: + inputAsText = "I_WAIT_FOR_EVENT"; + break; + case I_LRM_EVENT: + inputAsText = "I_LRM_EVENT"; + break; + case I_PENDING: + inputAsText = "I_PENDING"; + break; + case I_HALT: + inputAsText = "I_HALT"; + break; + case I_TERMINATE: + inputAsText = "I_TERMINATE"; + break; + case I_ILLEGAL: + inputAsText = "I_ILLEGAL"; + break; + } + + if (inputAsText == NULL) { + crm_err("Input %d is unknown", input); + inputAsText = ""; + } + + return inputAsText; +} + +const char * +fsa_state2string(enum crmd_fsa_state state) +{ + const char *stateAsText = NULL; + + switch (state) { + case S_IDLE: + stateAsText = "S_IDLE"; + break; + case S_ELECTION: + stateAsText = "S_ELECTION"; + break; + case S_INTEGRATION: + stateAsText = "S_INTEGRATION"; + break; + case S_FINALIZE_JOIN: + stateAsText = "S_FINALIZE_JOIN"; + break; + case S_NOT_DC: + stateAsText = "S_NOT_DC"; + break; + case S_POLICY_ENGINE: + stateAsText = "S_POLICY_ENGINE"; + break; + case S_RECOVERY: + stateAsText = "S_RECOVERY"; + break; + case S_RELEASE_DC: + stateAsText = "S_RELEASE_DC"; + break; + case S_PENDING: + stateAsText = "S_PENDING"; + break; + case S_STOPPING: + stateAsText = "S_STOPPING"; + break; + case S_TERMINATE: + stateAsText = "S_TERMINATE"; + break; + case S_TRANSITION_ENGINE: + stateAsText = "S_TRANSITION_ENGINE"; + break; + case S_STARTING: + stateAsText = "S_STARTING"; + break; + case S_HALT: + stateAsText = "S_HALT"; + break; + case S_ILLEGAL: + stateAsText = "S_ILLEGAL"; + break; + } + + if (stateAsText == NULL) { + crm_err("State %d is unknown", state); + stateAsText = ""; + } + + return stateAsText; +} + +const char * +fsa_cause2string(enum crmd_fsa_cause cause) +{ + const char *causeAsText = NULL; + + switch (cause) { + case C_UNKNOWN: + causeAsText = "C_UNKNOWN"; + break; + case C_STARTUP: + causeAsText = "C_STARTUP"; + break; + case C_IPC_MESSAGE: + causeAsText = "C_IPC_MESSAGE"; + break; + case C_HA_MESSAGE: + causeAsText = "C_HA_MESSAGE"; + break; + case C_TIMER_POPPED: + causeAsText = "C_TIMER_POPPED"; + break; + case C_SHUTDOWN: + causeAsText = "C_SHUTDOWN"; + break; + case C_LRM_OP_CALLBACK: + causeAsText = "C_LRM_OP_CALLBACK"; + break; + case C_CRMD_STATUS_CALLBACK: + causeAsText = "C_CRMD_STATUS_CALLBACK"; + break; + case C_FSA_INTERNAL: + causeAsText = "C_FSA_INTERNAL"; + break; + } + + if (causeAsText == NULL) { + crm_err("Cause %d is unknown", cause); + causeAsText = ""; + } + + return causeAsText; +} + +const char * +fsa_action2string(long long action) +{ + const char *actionAsText = NULL; + + switch (action) { + + case A_NOTHING: + actionAsText = "A_NOTHING"; + break; + case A_ELECTION_START: + actionAsText = "A_ELECTION_START"; + break; + case A_DC_JOIN_FINAL: + actionAsText = "A_DC_JOIN_FINAL"; + break; + case A_READCONFIG: + actionAsText = "A_READCONFIG"; + break; + case O_RELEASE: + actionAsText = "O_RELEASE"; + break; + case A_STARTUP: + actionAsText = "A_STARTUP"; + break; + case A_STARTED: + actionAsText = "A_STARTED"; + break; + case A_HA_CONNECT: + actionAsText = "A_HA_CONNECT"; + break; + case A_HA_DISCONNECT: + actionAsText = "A_HA_DISCONNECT"; + break; + case A_LRM_CONNECT: + actionAsText = "A_LRM_CONNECT"; + break; + case A_LRM_EVENT: + actionAsText = "A_LRM_EVENT"; + break; + case A_LRM_INVOKE: + actionAsText = "A_LRM_INVOKE"; + break; + case A_LRM_DISCONNECT: + actionAsText = "A_LRM_DISCONNECT"; + break; + case O_LRM_RECONNECT: + actionAsText = "O_LRM_RECONNECT"; + break; + case A_CL_JOIN_QUERY: + actionAsText = "A_CL_JOIN_QUERY"; + break; + case A_DC_TIMER_STOP: + actionAsText = "A_DC_TIMER_STOP"; + break; + case A_DC_TIMER_START: + actionAsText = "A_DC_TIMER_START"; + break; + case A_INTEGRATE_TIMER_START: + actionAsText = "A_INTEGRATE_TIMER_START"; + break; + case A_INTEGRATE_TIMER_STOP: + actionAsText = "A_INTEGRATE_TIMER_STOP"; + break; + case A_FINALIZE_TIMER_START: + actionAsText = "A_FINALIZE_TIMER_START"; + break; + case A_FINALIZE_TIMER_STOP: + actionAsText = "A_FINALIZE_TIMER_STOP"; + break; + case A_ELECTION_COUNT: + actionAsText = "A_ELECTION_COUNT"; + break; + case A_ELECTION_VOTE: + actionAsText = "A_ELECTION_VOTE"; + break; + case A_ELECTION_CHECK: + actionAsText = "A_ELECTION_CHECK"; + break; + case A_CL_JOIN_ANNOUNCE: + actionAsText = "A_CL_JOIN_ANNOUNCE"; + break; + case A_CL_JOIN_REQUEST: + actionAsText = "A_CL_JOIN_REQUEST"; + break; + case A_CL_JOIN_RESULT: + actionAsText = "A_CL_JOIN_RESULT"; + break; + case A_DC_JOIN_OFFER_ALL: + actionAsText = "A_DC_JOIN_OFFER_ALL"; + break; + case A_DC_JOIN_OFFER_ONE: + actionAsText = "A_DC_JOIN_OFFER_ONE"; + break; + case A_DC_JOIN_PROCESS_REQ: + actionAsText = "A_DC_JOIN_PROCESS_REQ"; + break; + case A_DC_JOIN_PROCESS_ACK: + actionAsText = "A_DC_JOIN_PROCESS_ACK"; + break; + case A_DC_JOIN_FINALIZE: + actionAsText = "A_DC_JOIN_FINALIZE"; + break; + case A_MSG_PROCESS: + actionAsText = "A_MSG_PROCESS"; + break; + case A_MSG_ROUTE: + actionAsText = "A_MSG_ROUTE"; + break; + case A_RECOVER: + actionAsText = "A_RECOVER"; + break; + case A_DC_RELEASE: + actionAsText = "A_DC_RELEASE"; + break; + case A_DC_RELEASED: + actionAsText = "A_DC_RELEASED"; + break; + case A_DC_TAKEOVER: + actionAsText = "A_DC_TAKEOVER"; + break; + case A_SHUTDOWN: + actionAsText = "A_SHUTDOWN"; + break; + case A_SHUTDOWN_REQ: + actionAsText = "A_SHUTDOWN_REQ"; + break; + case A_STOP: + actionAsText = "A_STOP "; + break; + case A_EXIT_0: + actionAsText = "A_EXIT_0"; + break; + case A_EXIT_1: + actionAsText = "A_EXIT_1"; + break; + case O_CIB_RESTART: + actionAsText = "O_CIB_RESTART"; + break; + case A_CIB_START: + actionAsText = "A_CIB_START"; + break; + case A_CIB_STOP: + actionAsText = "A_CIB_STOP"; + break; + case A_TE_INVOKE: + actionAsText = "A_TE_INVOKE"; + break; + case O_TE_RESTART: + actionAsText = "O_TE_RESTART"; + break; + case A_TE_START: + actionAsText = "A_TE_START"; + break; + case A_TE_STOP: + actionAsText = "A_TE_STOP"; + break; + case A_TE_HALT: + actionAsText = "A_TE_HALT"; + break; + case A_TE_CANCEL: + actionAsText = "A_TE_CANCEL"; + break; + case A_PE_INVOKE: + actionAsText = "A_PE_INVOKE"; + break; + case O_PE_RESTART: + actionAsText = "O_PE_RESTART"; + break; + case A_PE_START: + actionAsText = "A_PE_START"; + break; + case A_PE_STOP: + actionAsText = "A_PE_STOP"; + break; + case A_NODE_BLOCK: + actionAsText = "A_NODE_BLOCK"; + break; + case A_UPDATE_NODESTATUS: + actionAsText = "A_UPDATE_NODESTATUS"; + break; + case A_LOG: + actionAsText = "A_LOG "; + break; + case A_ERROR: + actionAsText = "A_ERROR "; + break; + case A_WARN: + actionAsText = "A_WARN "; + break; + /* Composite actions */ + case A_DC_TIMER_START | A_CL_JOIN_QUERY: + actionAsText = "A_DC_TIMER_START|A_CL_JOIN_QUERY"; + break; + } + + if (actionAsText == NULL) { + crm_err("Action %.16llx is unknown", action); + actionAsText = ""; + } + + return actionAsText; +} + +void +fsa_dump_inputs(int log_level, const char *text, long long input_register) +{ + if (input_register == A_NOTHING) { + return; + } + if (text == NULL) { + text = "Input register contents:"; + } + + if (pcmk_is_set(input_register, R_THE_DC)) { + crm_trace("%s %.16llx (R_THE_DC)", text, R_THE_DC); + } + if (pcmk_is_set(input_register, R_STARTING)) { + crm_trace("%s %.16llx (R_STARTING)", text, R_STARTING); + } + if (pcmk_is_set(input_register, R_SHUTDOWN)) { + crm_trace("%s %.16llx (R_SHUTDOWN)", text, R_SHUTDOWN); + } + if (pcmk_is_set(input_register, R_STAYDOWN)) { + crm_trace("%s %.16llx (R_STAYDOWN)", text, R_STAYDOWN); + } + if (pcmk_is_set(input_register, R_JOIN_OK)) { + crm_trace("%s %.16llx (R_JOIN_OK)", text, R_JOIN_OK); + } + if (pcmk_is_set(input_register, R_READ_CONFIG)) { + crm_trace("%s %.16llx (R_READ_CONFIG)", text, R_READ_CONFIG); + } + if (pcmk_is_set(input_register, R_INVOKE_PE)) { + crm_trace("%s %.16llx (R_INVOKE_PE)", text, R_INVOKE_PE); + } + if (pcmk_is_set(input_register, R_CIB_CONNECTED)) { + crm_trace("%s %.16llx (R_CIB_CONNECTED)", text, R_CIB_CONNECTED); + } + if (pcmk_is_set(input_register, R_PE_CONNECTED)) { + crm_trace("%s %.16llx (R_PE_CONNECTED)", text, R_PE_CONNECTED); + } + if (pcmk_is_set(input_register, R_TE_CONNECTED)) { + crm_trace("%s %.16llx (R_TE_CONNECTED)", text, R_TE_CONNECTED); + } + if (pcmk_is_set(input_register, R_LRM_CONNECTED)) { + crm_trace("%s %.16llx (R_LRM_CONNECTED)", text, R_LRM_CONNECTED); + } + if (pcmk_is_set(input_register, R_CIB_REQUIRED)) { + crm_trace("%s %.16llx (R_CIB_REQUIRED)", text, R_CIB_REQUIRED); + } + if (pcmk_is_set(input_register, R_PE_REQUIRED)) { + crm_trace("%s %.16llx (R_PE_REQUIRED)", text, R_PE_REQUIRED); + } + if (pcmk_is_set(input_register, R_TE_REQUIRED)) { + crm_trace("%s %.16llx (R_TE_REQUIRED)", text, R_TE_REQUIRED); + } + if (pcmk_is_set(input_register, R_REQ_PEND)) { + crm_trace("%s %.16llx (R_REQ_PEND)", text, R_REQ_PEND); + } + if (pcmk_is_set(input_register, R_PE_PEND)) { + crm_trace("%s %.16llx (R_PE_PEND)", text, R_PE_PEND); + } + if (pcmk_is_set(input_register, R_TE_PEND)) { + crm_trace("%s %.16llx (R_TE_PEND)", text, R_TE_PEND); + } + if (pcmk_is_set(input_register, R_RESP_PEND)) { + crm_trace("%s %.16llx (R_RESP_PEND)", text, R_RESP_PEND); + } + if (pcmk_is_set(input_register, R_CIB_DONE)) { + crm_trace("%s %.16llx (R_CIB_DONE)", text, R_CIB_DONE); + } + if (pcmk_is_set(input_register, R_HAVE_CIB)) { + crm_trace("%s %.16llx (R_HAVE_CIB)", text, R_HAVE_CIB); + } + if (pcmk_is_set(input_register, R_MEMBERSHIP)) { + crm_trace("%s %.16llx (R_MEMBERSHIP)", text, R_MEMBERSHIP); + } + if (pcmk_is_set(input_register, R_PEER_DATA)) { + crm_trace("%s %.16llx (R_PEER_DATA)", text, R_PEER_DATA); + } + if (pcmk_is_set(input_register, R_IN_RECOVERY)) { + crm_trace("%s %.16llx (R_IN_RECOVERY)", text, R_IN_RECOVERY); + } +} + +void +fsa_dump_actions(uint64_t action, const char *text) +{ + if (pcmk_is_set(action, A_READCONFIG)) { + crm_trace("Action %.16llx (A_READCONFIG) %s", A_READCONFIG, text); + } + if (pcmk_is_set(action, A_STARTUP)) { + crm_trace("Action %.16llx (A_STARTUP) %s", A_STARTUP, text); + } + if (pcmk_is_set(action, A_STARTED)) { + crm_trace("Action %.16llx (A_STARTED) %s", A_STARTED, text); + } + if (pcmk_is_set(action, A_HA_CONNECT)) { + crm_trace("Action %.16llx (A_CONNECT) %s", A_HA_CONNECT, text); + } + if (pcmk_is_set(action, A_HA_DISCONNECT)) { + crm_trace("Action %.16llx (A_DISCONNECT) %s", A_HA_DISCONNECT, text); + } + if (pcmk_is_set(action, A_LRM_CONNECT)) { + crm_trace("Action %.16llx (A_LRM_CONNECT) %s", A_LRM_CONNECT, text); + } + if (pcmk_is_set(action, A_LRM_EVENT)) { + crm_trace("Action %.16llx (A_LRM_EVENT) %s", A_LRM_EVENT, text); + } + if (pcmk_is_set(action, A_LRM_INVOKE)) { + crm_trace("Action %.16llx (A_LRM_INVOKE) %s", A_LRM_INVOKE, text); + } + if (pcmk_is_set(action, A_LRM_DISCONNECT)) { + crm_trace("Action %.16llx (A_LRM_DISCONNECT) %s", A_LRM_DISCONNECT, text); + } + if (pcmk_is_set(action, A_DC_TIMER_STOP)) { + crm_trace("Action %.16llx (A_DC_TIMER_STOP) %s", A_DC_TIMER_STOP, text); + } + if (pcmk_is_set(action, A_DC_TIMER_START)) { + crm_trace("Action %.16llx (A_DC_TIMER_START) %s", A_DC_TIMER_START, text); + } + if (pcmk_is_set(action, A_INTEGRATE_TIMER_START)) { + crm_trace("Action %.16llx (A_INTEGRATE_TIMER_START) %s", A_INTEGRATE_TIMER_START, text); + } + if (pcmk_is_set(action, A_INTEGRATE_TIMER_STOP)) { + crm_trace("Action %.16llx (A_INTEGRATE_TIMER_STOP) %s", A_INTEGRATE_TIMER_STOP, text); + } + if (pcmk_is_set(action, A_FINALIZE_TIMER_START)) { + crm_trace("Action %.16llx (A_FINALIZE_TIMER_START) %s", A_FINALIZE_TIMER_START, text); + } + if (pcmk_is_set(action, A_FINALIZE_TIMER_STOP)) { + crm_trace("Action %.16llx (A_FINALIZE_TIMER_STOP) %s", A_FINALIZE_TIMER_STOP, text); + } + if (pcmk_is_set(action, A_ELECTION_COUNT)) { + crm_trace("Action %.16llx (A_ELECTION_COUNT) %s", A_ELECTION_COUNT, text); + } + if (pcmk_is_set(action, A_ELECTION_VOTE)) { + crm_trace("Action %.16llx (A_ELECTION_VOTE) %s", A_ELECTION_VOTE, text); + } + if (pcmk_is_set(action, A_ELECTION_CHECK)) { + crm_trace("Action %.16llx (A_ELECTION_CHECK) %s", A_ELECTION_CHECK, text); + } + if (pcmk_is_set(action, A_CL_JOIN_ANNOUNCE)) { + crm_trace("Action %.16llx (A_CL_JOIN_ANNOUNCE) %s", A_CL_JOIN_ANNOUNCE, text); + } + if (pcmk_is_set(action, A_CL_JOIN_REQUEST)) { + crm_trace("Action %.16llx (A_CL_JOIN_REQUEST) %s", A_CL_JOIN_REQUEST, text); + } + if (pcmk_is_set(action, A_CL_JOIN_RESULT)) { + crm_trace("Action %.16llx (A_CL_JOIN_RESULT) %s", A_CL_JOIN_RESULT, text); + } + if (pcmk_is_set(action, A_DC_JOIN_OFFER_ALL)) { + crm_trace("Action %.16llx (A_DC_JOIN_OFFER_ALL) %s", A_DC_JOIN_OFFER_ALL, text); + } + if (pcmk_is_set(action, A_DC_JOIN_OFFER_ONE)) { + crm_trace("Action %.16llx (A_DC_JOIN_OFFER_ONE) %s", A_DC_JOIN_OFFER_ONE, text); + } + if (pcmk_is_set(action, A_DC_JOIN_PROCESS_REQ)) { + crm_trace("Action %.16llx (A_DC_JOIN_PROCESS_REQ) %s", A_DC_JOIN_PROCESS_REQ, text); + } + if (pcmk_is_set(action, A_DC_JOIN_PROCESS_ACK)) { + crm_trace("Action %.16llx (A_DC_JOIN_PROCESS_ACK) %s", A_DC_JOIN_PROCESS_ACK, text); + } + if (pcmk_is_set(action, A_DC_JOIN_FINALIZE)) { + crm_trace("Action %.16llx (A_DC_JOIN_FINALIZE) %s", A_DC_JOIN_FINALIZE, text); + } + if (pcmk_is_set(action, A_MSG_PROCESS)) { + crm_trace("Action %.16llx (A_MSG_PROCESS) %s", A_MSG_PROCESS, text); + } + if (pcmk_is_set(action, A_MSG_ROUTE)) { + crm_trace("Action %.16llx (A_MSG_ROUTE) %s", A_MSG_ROUTE, text); + } + if (pcmk_is_set(action, A_RECOVER)) { + crm_trace("Action %.16llx (A_RECOVER) %s", A_RECOVER, text); + } + if (pcmk_is_set(action, A_DC_RELEASE)) { + crm_trace("Action %.16llx (A_DC_RELEASE) %s", A_DC_RELEASE, text); + } + if (pcmk_is_set(action, A_DC_RELEASED)) { + crm_trace("Action %.16llx (A_DC_RELEASED) %s", A_DC_RELEASED, text); + } + if (pcmk_is_set(action, A_DC_TAKEOVER)) { + crm_trace("Action %.16llx (A_DC_TAKEOVER) %s", A_DC_TAKEOVER, text); + } + if (pcmk_is_set(action, A_SHUTDOWN)) { + crm_trace("Action %.16llx (A_SHUTDOWN) %s", A_SHUTDOWN, text); + } + if (pcmk_is_set(action, A_SHUTDOWN_REQ)) { + crm_trace("Action %.16llx (A_SHUTDOWN_REQ) %s", A_SHUTDOWN_REQ, text); + } + if (pcmk_is_set(action, A_STOP)) { + crm_trace("Action %.16llx (A_STOP ) %s", A_STOP, text); + } + if (pcmk_is_set(action, A_EXIT_0)) { + crm_trace("Action %.16llx (A_EXIT_0) %s", A_EXIT_0, text); + } + if (pcmk_is_set(action, A_EXIT_1)) { + crm_trace("Action %.16llx (A_EXIT_1) %s", A_EXIT_1, text); + } + if (pcmk_is_set(action, A_CIB_START)) { + crm_trace("Action %.16llx (A_CIB_START) %s", A_CIB_START, text); + } + if (pcmk_is_set(action, A_CIB_STOP)) { + crm_trace("Action %.16llx (A_CIB_STOP) %s", A_CIB_STOP, text); + } + if (pcmk_is_set(action, A_TE_INVOKE)) { + crm_trace("Action %.16llx (A_TE_INVOKE) %s", A_TE_INVOKE, text); + } + if (pcmk_is_set(action, A_TE_START)) { + crm_trace("Action %.16llx (A_TE_START) %s", A_TE_START, text); + } + if (pcmk_is_set(action, A_TE_STOP)) { + crm_trace("Action %.16llx (A_TE_STOP) %s", A_TE_STOP, text); + } + if (pcmk_is_set(action, A_TE_CANCEL)) { + crm_trace("Action %.16llx (A_TE_CANCEL) %s", A_TE_CANCEL, text); + } + if (pcmk_is_set(action, A_PE_INVOKE)) { + crm_trace("Action %.16llx (A_PE_INVOKE) %s", A_PE_INVOKE, text); + } + if (pcmk_is_set(action, A_PE_START)) { + crm_trace("Action %.16llx (A_PE_START) %s", A_PE_START, text); + } + if (pcmk_is_set(action, A_PE_STOP)) { + crm_trace("Action %.16llx (A_PE_STOP) %s", A_PE_STOP, text); + } + if (pcmk_is_set(action, A_NODE_BLOCK)) { + crm_trace("Action %.16llx (A_NODE_BLOCK) %s", A_NODE_BLOCK, text); + } + if (pcmk_is_set(action, A_UPDATE_NODESTATUS)) { + crm_trace("Action %.16llx (A_UPDATE_NODESTATUS) %s", A_UPDATE_NODESTATUS, text); + } + if (pcmk_is_set(action, A_LOG)) { + crm_trace("Action %.16llx (A_LOG ) %s", A_LOG, text); + } + if (pcmk_is_set(action, A_ERROR)) { + crm_trace("Action %.16llx (A_ERROR ) %s", A_ERROR, text); + } + if (pcmk_is_set(action, A_WARN)) { + crm_trace("Action %.16llx (A_WARN ) %s", A_WARN, text); + } +} + +gboolean +update_dc(xmlNode * msg) +{ + char *last_dc = controld_globals.dc_name; + const char *dc_version = NULL; + const char *welcome_from = NULL; + + if (msg != NULL) { + gboolean invalid = FALSE; + + dc_version = crm_element_value(msg, F_CRM_VERSION); + welcome_from = crm_element_value(msg, F_CRM_HOST_FROM); + + CRM_CHECK(dc_version != NULL, return FALSE); + CRM_CHECK(welcome_from != NULL, return FALSE); + + if (AM_I_DC + && !pcmk__str_eq(welcome_from, controld_globals.our_nodename, + pcmk__str_casei)) { + invalid = TRUE; + + } else if ((controld_globals.dc_name != NULL) + && !pcmk__str_eq(welcome_from, controld_globals.dc_name, + pcmk__str_casei)) { + invalid = TRUE; + } + + if (invalid) { + if (AM_I_DC) { + crm_err("Not updating DC to %s (%s): we are also a DC", + welcome_from, dc_version); + } else { + crm_warn("New DC %s is not %s", + welcome_from, controld_globals.dc_name); + } + + controld_set_fsa_action_flags(A_CL_JOIN_QUERY | A_DC_TIMER_START); + controld_trigger_fsa(); + return FALSE; + } + } + + controld_globals.dc_name = NULL; // freed as last_dc + pcmk__str_update(&(controld_globals.dc_name), welcome_from); + pcmk__str_update(&(controld_globals.dc_version), dc_version); + + if (pcmk__str_eq(controld_globals.dc_name, last_dc, pcmk__str_casei)) { + /* do nothing */ + + } else if (controld_globals.dc_name != NULL) { + crm_node_t *dc_node = crm_get_peer(0, controld_globals.dc_name); + + crm_info("Set DC to %s (%s)", + controld_globals.dc_name, + pcmk__s(controld_globals.dc_version, "unknown version")); + pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_MEMBER); + + } else if (last_dc != NULL) { + crm_info("Unset DC (was %s)", last_dc); + } + + free(last_dc); + return TRUE; +} + +void crmd_peer_down(crm_node_t *peer, bool full) +{ + if(full && peer->state == NULL) { + pcmk__update_peer_state(__func__, peer, CRM_NODE_LOST, 0); + crm_update_peer_proc(__func__, peer, crm_proc_none, NULL); + } + crm_update_peer_join(__func__, peer, crm_join_none); + pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN); +} + +/*! + * \internal + * \brief Check feature set compatibility of DC and joining node + * + * Return true if a joining node's CRM feature set is compatible with the + * current DC's. The feature sets are compatible if they have the same major + * version number, and the DC's minor version number is the same or older than + * the joining node's. The minor-minor version is intended solely to allow + * resource agents to detect feature support, and so is ignored. + * + * \param[in] dc_version DC's feature set + * \param[in] join_version Joining node's version + */ +bool +feature_set_compatible(const char *dc_version, const char *join_version) +{ + char *dc_minor = NULL; + char *join_minor = NULL; + long dc_v = 0; + long join_v = 0; + + // Get DC's major version + errno = 0; + dc_v = strtol(dc_version, &dc_minor, 10); + if (errno) { + return FALSE; + } + + // Get joining node's major version + errno = 0; + join_v = strtol(join_version, &join_minor, 10); + if (errno) { + return FALSE; + } + + // Major version component must be identical + if (dc_v != join_v) { + return FALSE; + } + + // Get DC's minor version + if (*dc_minor == '.') { + ++dc_minor; + } + errno = 0; + dc_v = strtol(dc_minor, NULL, 10); + if (errno) { + return FALSE; + } + + // Get joining node's minor version + if (*join_minor == '.') { + ++join_minor; + } + errno = 0; + join_v = strtol(join_minor, NULL, 10); + if (errno) { + return FALSE; + } + + // DC's minor version must be the same or older + return dc_v <= join_v; +} + +const char * +get_node_id(xmlNode *lrm_rsc_op) +{ + xmlNode *node = lrm_rsc_op; + + while (node != NULL && !pcmk__str_eq(XML_CIB_TAG_STATE, TYPE(node), pcmk__str_casei)) { + node = node->parent; + } + + CRM_CHECK(node != NULL, return NULL); + return ID(node); +} diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h new file mode 100644 index 0000000..6ce413d --- /dev/null +++ b/daemons/controld/controld_utils.h @@ -0,0 +1,61 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef CRMD_UTILS__H +# define CRMD_UTILS__H + +# include +# include + +# define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + +enum node_update_flags { + node_update_none = 0x0000, + node_update_quick = 0x0001, + node_update_cluster = 0x0010, + node_update_peer = 0x0020, + node_update_join = 0x0040, + node_update_expected = 0x0100, + node_update_all = node_update_cluster|node_update_peer|node_update_join|node_update_expected, +}; + +crm_exit_t crmd_exit(crm_exit_t exit_code); +_Noreturn void crmd_fast_exit(crm_exit_t exit_code); +void controld_shutdown_schedulerd_ipc(void); +void controld_stop_sched_timer(void); +void controld_free_sched_timer(void); +void controld_expect_sched_reply(char *ref); + +void fsa_dump_actions(uint64_t action, const char *text); +void fsa_dump_inputs(int log_level, const char *text, long long input_register); + +gboolean update_dc(xmlNode * msg); +void crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase); +xmlNode *create_node_state_update(crm_node_t *node, int flags, + xmlNode *parent, const char *source); +void populate_cib_nodes(enum node_update_flags flags, const char *source); +void crm_update_quorum(gboolean quorum, gboolean force_update); +void controld_close_attrd_ipc(void); +void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node); +void update_attrd_list(GList *attrs, uint32_t opts); +void update_attrd_remote_node_removed(const char *host, const char *user_name); +void update_attrd_clear_failures(const char *host, const char *rsc, + const char *op, const char *interval_spec, + gboolean is_remote_node); + +int crmd_join_phase_count(enum crm_join_phase phase); +void crmd_join_phase_log(int level); + +void crmd_peer_down(crm_node_t *peer, bool full); + +bool feature_set_compatible(const char *dc_version, const char *join_version); + +const char *get_node_id(xmlNode *lrm_rsc_op); + +#endif diff --git a/daemons/controld/pacemaker-controld.c b/daemons/controld/pacemaker-controld.c new file mode 100644 index 0000000..5858898 --- /dev/null +++ b/daemons/controld/pacemaker-controld.c @@ -0,0 +1,205 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#define SUMMARY "daemon for coordinating a Pacemaker cluster's response " \ + "to events" + +_Noreturn void crmd_init(void); +extern void init_dotfile(void); + +controld_globals_t controld_globals = { + // Automatic initialization to 0, false, or NULL is fine for most members + .fsa_state = S_STARTING, + .fsa_actions = A_NOTHING, +}; + +static pcmk__supported_format_t formats[] = { + PCMK__SUPPORTED_FORMAT_NONE, + PCMK__SUPPORTED_FORMAT_TEXT, + PCMK__SUPPORTED_FORMAT_XML, + { NULL, NULL, NULL } +}; + +static GOptionContext * +build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) +{ + return pcmk__build_arg_context(args, "text (default), xml", group, + "[metadata]"); +} + +int +main(int argc, char **argv) +{ + int rc = pcmk_rc_ok; + crm_exit_t exit_code = CRM_EX_OK; + bool initialize = true; + + crm_ipc_t *old_instance = NULL; + + pcmk__output_t *out = NULL; + + GError *error = NULL; + + GOptionGroup *output_group = NULL; + pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); + gchar **processed_args = pcmk__cmdline_preproc(argv, NULL); + GOptionContext *context = build_arg_context(args, &output_group); + + crm_log_preinit(NULL, argc, argv); + + pcmk__register_formats(output_group, formats); + if (!g_option_context_parse_strv(context, &processed_args, &error)) { + exit_code = CRM_EX_USAGE; + goto done; + } + + rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); + if (rc != pcmk_rc_ok) { + exit_code = CRM_EX_ERROR; + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "Error creating output format %s: %s", + args->output_ty, pcmk_rc_str(rc)); + goto done; + } + + if (args->version) { + out->version(out, false); + initialize = false; + goto done; + } + + if ((g_strv_length(processed_args) >= 2) + && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { + crmd_metadata(); + initialize = false; + goto done; + } + + pcmk__cli_init_logging("pacemaker-controld", args->verbosity); + crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); + crm_notice("Starting Pacemaker controller"); + + old_instance = crm_ipc_new(CRM_SYSTEM_CRMD, 0); + if (old_instance == NULL) { + /* crm_ipc_new will have already printed an error message with crm_err. */ + exit_code = CRM_EX_FATAL; + goto done; + } + + if (crm_ipc_connect(old_instance)) { + /* IPC end-point already up */ + crm_ipc_close(old_instance); + crm_ipc_destroy(old_instance); + crm_err("pacemaker-controld is already active, aborting startup"); + initialize = false; + goto done; + + } else { + /* not up or not authentic, we'll proceed either way */ + crm_ipc_destroy(old_instance); + old_instance = NULL; + } + + if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) { + exit_code = CRM_EX_FATAL; + crm_err("Terminating due to bad permissions on " PE_STATE_DIR); + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "Bad permissions on " PE_STATE_DIR + " (see logs for details)"); + goto done; + + } else if (pcmk__daemon_can_write(CRM_CONFIG_DIR, NULL) == FALSE) { + exit_code = CRM_EX_FATAL; + crm_err("Terminating due to bad permissions on " CRM_CONFIG_DIR); + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "Bad permissions on " CRM_CONFIG_DIR + " (see logs for details)"); + goto done; + } + + if (pcmk__log_output_new(&(controld_globals.logger_out)) != pcmk_rc_ok) { + exit_code = CRM_EX_FATAL; + goto done; + } + + pcmk__output_set_log_level(controld_globals.logger_out, LOG_TRACE); + +done: + g_strfreev(processed_args); + pcmk__free_arg_context(context); + + pcmk__output_and_clear_error(&error, out); + + if (out != NULL) { + out->finish(out, exit_code, true, NULL); + pcmk__output_free(out); + } + pcmk__unregister_formats(); + + if ((exit_code == CRM_EX_OK) && initialize) { + // Does not return + crmd_init(); + } + crm_exit(exit_code); +} + +void +crmd_init(void) +{ + crm_exit_t exit_code = CRM_EX_OK; + enum crmd_fsa_state state; + + init_dotfile(); + register_fsa_input(C_STARTUP, I_STARTUP, NULL); + + crm_peer_init(); + state = s_crmd_fsa(C_STARTUP); + + if (state == S_PENDING || state == S_STARTING) { + /* Create the mainloop and run it... */ + crm_trace("Starting %s's mainloop", crm_system_name); + controld_globals.mainloop = g_main_loop_new(NULL, FALSE); + g_main_loop_run(controld_globals.mainloop); + if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) { + crm_info("Inhibiting automated respawn"); + exit_code = CRM_EX_FATAL; + } + + } else { + crm_err("Startup of %s failed. Current state: %s", + crm_system_name, fsa_state2string(state)); + exit_code = CRM_EX_ERROR; + } + + crm_info("%s[%lu] exiting with status %d (%s)", + crm_system_name, (unsigned long) getpid(), exit_code, + crm_exit_str(exit_code)); + + crmd_fast_exit(exit_code); +} diff --git a/daemons/controld/pacemaker-controld.h b/daemons/controld/pacemaker-controld.h new file mode 100644 index 0000000..1484a00 --- /dev/null +++ b/daemons/controld/pacemaker-controld.h @@ -0,0 +1,39 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef CRMD__H +# define CRMD__H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +# define controld_trigger_config() \ + controld_trigger_config_as(__func__, __LINE__) + +void crmd_metadata(void); +void controld_trigger_config_as(const char *fn, int line); +void controld_election_init(const char *uname); +void controld_configure_election(GHashTable *options); +void controld_remove_voter(const char *uname); +void controld_election_fini(void); +void controld_stop_current_election_timeout(void); + +#endif diff --git a/daemons/execd/Makefile.am b/daemons/execd/Makefile.am new file mode 100644 index 0000000..466f0df --- /dev/null +++ b/daemons/execd/Makefile.am @@ -0,0 +1,76 @@ +# +# Copyright 2012-2021 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU Lesser General Public License +# version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. +# + +include $(top_srcdir)/mk/common.mk +include $(top_srcdir)/mk/man.mk + +halibdir = $(CRM_DAEMON_DIR) + +halib_PROGRAMS = pacemaker-execd cts-exec-helper + +EXTRA_DIST = pacemaker-remoted.8.inc + +pacemaker_execd_CFLAGS = $(CFLAGS_HARDENED_EXE) +pacemaker_execd_LDFLAGS = $(LDFLAGS_HARDENED_EXE) + +pacemaker_execd_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ + $(top_builddir)/lib/services/libcrmservice.la \ + $(top_builddir)/lib/fencing/libstonithd.la +pacemaker_execd_SOURCES = pacemaker-execd.c execd_commands.c \ + execd_alerts.c + +if BUILD_REMOTE +sbin_PROGRAMS = pacemaker-remoted +if BUILD_SYSTEMD +systemdsystemunit_DATA = pacemaker_remote.service +else +initdir = $(INITDIR) +init_SCRIPTS = pacemaker_remote +endif + +pacemaker_remoted_CPPFLAGS = -DPCMK__COMPILE_REMOTE $(AM_CPPFLAGS) + +pacemaker_remoted_CFLAGS = $(CFLAGS_HARDENED_EXE) +pacemaker_remoted_LDFLAGS = $(LDFLAGS_HARDENED_EXE) + +pacemaker_remoted_LDADD = $(pacemaker_execd_LDADD) \ + $(top_builddir)/lib/lrmd/liblrmd.la +pacemaker_remoted_SOURCES = $(pacemaker_execd_SOURCES) \ + remoted_tls.c remoted_pidone.c remoted_proxy.c +endif + +cts_exec_helper_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ + $(top_builddir)/lib/lrmd/liblrmd.la \ + $(top_builddir)/lib/cib/libcib.la \ + $(top_builddir)/lib/services/libcrmservice.la \ + $(top_builddir)/lib/pengine/libpe_status.la +cts_exec_helper_SOURCES = cts-exec-helper.c + +noinst_HEADERS = pacemaker-execd.h + +CLEANFILES = $(man8_MANS) + +# Always create a symlink for the old pacemaker_remoted name, so that bundle +# container images using a current Pacemaker will run on cluster nodes running +# Pacemaker 1 (>=1.1.17). +install-exec-hook: +if BUILD_LEGACY_LINKS + cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f lrmd && $(LN_S) pacemaker-execd lrmd +endif +if BUILD_REMOTE + cd $(DESTDIR)$(sbindir) && rm -f pacemaker_remoted && $(LN_S) pacemaker-remoted pacemaker_remoted +endif + +uninstall-hook: +if BUILD_LEGACY_LINKS + cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f lrmd +endif +if BUILD_REMOTE + cd $(DESTDIR)$(sbindir) && rm -f pacemaker_remoted +endif diff --git a/daemons/execd/cts-exec-helper.c b/daemons/execd/cts-exec-helper.c new file mode 100644 index 0000000..2af5e16 --- /dev/null +++ b/daemons/execd/cts-exec-helper.c @@ -0,0 +1,624 @@ +/* + * Copyright 2012-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define SUMMARY "cts-exec-helper - inject commands into the Pacemaker executor and watch for events" + +static int exec_call_id = 0; +static gboolean start_test(gpointer user_data); +static void try_connect(void); + +static char *key = NULL; +static char *val = NULL; + +static struct { + int verbose; + int quiet; + guint interval_ms; + int timeout; + int start_delay; + int cancel_call_id; + gboolean no_wait; + gboolean is_running; + gboolean no_connect; + int exec_call_opts; + const char *api_call; + const char *rsc_id; + const char *provider; + const char *class; + const char *type; + const char *action; + const char *listen; + gboolean use_tls; + lrmd_key_value_t *params; +} options; + +static gboolean +interval_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { + options.interval_ms = crm_parse_interval_spec(optarg); + return errno == 0; +} + +static gboolean +notify_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { + if (pcmk__str_any_of(option_name, "--notify-orig", "-n", NULL)) { + options.exec_call_opts = lrmd_opt_notify_orig_only; + } else if (pcmk__str_any_of(option_name, "--notify-changes", "-o", NULL)) { + options.exec_call_opts = lrmd_opt_notify_changes_only; + } + + return TRUE; +} + +static gboolean +param_key_val_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { + if (pcmk__str_any_of(option_name, "--param-key", "-k", NULL)) { + pcmk__str_update(&key, optarg); + } else if (pcmk__str_any_of(option_name, "--param-val", "-v", NULL)) { + pcmk__str_update(&val, optarg); + } + + if (key != NULL && val != NULL) { + options.params = lrmd_key_value_add(options.params, key, val); + pcmk__str_update(&key, NULL); + pcmk__str_update(&val, NULL); + } + + return TRUE; +} + +static GOptionEntry basic_entries[] = { + { "api-call", 'c', 0, G_OPTION_ARG_STRING, &options.api_call, + "Directly relates to executor API functions", + NULL }, + + { "is-running", 'R', 0, G_OPTION_ARG_NONE, &options.is_running, + "Determine if a resource is registered and running", + NULL }, + + { "listen", 'l', 0, G_OPTION_ARG_STRING, &options.listen, + "Listen for a specific event string", + NULL }, + + { "no-wait", 'w', 0, G_OPTION_ARG_NONE, &options.no_wait, + "Make api call and do not wait for result", + NULL }, + + { "notify-changes", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, notify_cb, + "Only notify client changes to recurring operations", + NULL }, + + { "notify-orig", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, notify_cb, + "Only notify this client of the results of an API action", + NULL }, + + { "tls", 'S', 0, G_OPTION_ARG_NONE, &options.use_tls, + "Use TLS backend for local connection", + NULL }, + + { NULL } +}; + +static GOptionEntry api_call_entries[] = { + { "action", 'a', 0, G_OPTION_ARG_STRING, &options.action, + NULL, NULL }, + + { "cancel-call-id", 'x', 0, G_OPTION_ARG_INT, &options.cancel_call_id, + NULL, NULL }, + + { "class", 'C', 0, G_OPTION_ARG_STRING, &options.class, + NULL, NULL }, + + { "interval", 'i', 0, G_OPTION_ARG_CALLBACK, interval_cb, + NULL, NULL }, + + { "param-key", 'k', 0, G_OPTION_ARG_CALLBACK, param_key_val_cb, + NULL, NULL }, + + { "param-val", 'v', 0, G_OPTION_ARG_CALLBACK, param_key_val_cb, + NULL, NULL }, + + { "provider", 'P', 0, G_OPTION_ARG_STRING, &options.provider, + NULL, NULL }, + + { "rsc-id", 'r', 0, G_OPTION_ARG_STRING, &options.rsc_id, + NULL, NULL }, + + { "start-delay", 's', 0, G_OPTION_ARG_INT, &options.start_delay, + NULL, NULL }, + + { "timeout", 't', 0, G_OPTION_ARG_INT, &options.timeout, + NULL, NULL }, + + { "type", 'T', 0, G_OPTION_ARG_STRING, &options.type, + NULL, NULL }, + + { NULL } +}; + +static GMainLoop *mainloop = NULL; +static lrmd_t *lrmd_conn = NULL; + +static char event_buf_v0[1024]; + +static crm_exit_t +test_exit(crm_exit_t exit_code) +{ + lrmd_api_delete(lrmd_conn); + return crm_exit(exit_code); +} + +#define print_result(fmt, args...) \ + if (!options.quiet) { \ + printf(fmt "\n" , ##args); \ + } + +#define report_event(event) \ + snprintf(event_buf_v0, sizeof(event_buf_v0), "NEW_EVENT event_type:%s rsc_id:%s action:%s rc:%s op_status:%s", \ + lrmd_event_type2str(event->type), \ + event->rsc_id, \ + event->op_type ? event->op_type : "none", \ + services_ocf_exitcode_str(event->rc), \ + pcmk_exec_status_str(event->op_status)); \ + crm_info("%s", event_buf_v0); + +static void +test_shutdown(int nsig) +{ + lrmd_api_delete(lrmd_conn); + lrmd_conn = NULL; +} + +static void +read_events(lrmd_event_data_t * event) +{ + report_event(event); + if (options.listen) { + if (pcmk__str_eq(options.listen, event_buf_v0, pcmk__str_casei)) { + print_result("LISTEN EVENT SUCCESSFUL"); + test_exit(CRM_EX_OK); + } + } + + if (exec_call_id && (event->call_id == exec_call_id)) { + if (event->op_status == 0 && event->rc == 0) { + print_result("API-CALL SUCCESSFUL for 'exec'"); + } else { + print_result("API-CALL FAILURE for 'exec', rc:%d lrmd_op_status:%s", + event->rc, pcmk_exec_status_str(event->op_status)); + test_exit(CRM_EX_ERROR); + } + + if (!options.listen) { + test_exit(CRM_EX_OK); + } + } +} + +static gboolean +timeout_err(gpointer data) +{ + print_result("LISTEN EVENT FAILURE - timeout occurred, never found"); + test_exit(CRM_EX_TIMEOUT); + return FALSE; +} + +static void +connection_events(lrmd_event_data_t * event) +{ + int rc = event->connection_rc; + + if (event->type != lrmd_event_connect) { + /* ignore */ + return; + } + + if (!rc) { + crm_info("Executor client connection established"); + start_test(NULL); + return; + } else { + sleep(1); + try_connect(); + crm_notice("Executor client connection failed"); + } +} + +static void +try_connect(void) +{ + int tries = 10; + static int num_tries = 0; + int rc = 0; + + lrmd_conn->cmds->set_callback(lrmd_conn, connection_events); + for (; num_tries < tries; num_tries++) { + rc = lrmd_conn->cmds->connect_async(lrmd_conn, crm_system_name, 3000); + + if (!rc) { + return; /* we'll hear back in async callback */ + } + sleep(1); + } + + print_result("API CONNECTION FAILURE"); + test_exit(CRM_EX_ERROR); +} + +static gboolean +start_test(gpointer user_data) +{ + int rc = 0; + + if (!options.no_connect) { + if (!lrmd_conn->cmds->is_connected(lrmd_conn)) { + try_connect(); + /* async connect -- this function will get called back into */ + return 0; + } + } + lrmd_conn->cmds->set_callback(lrmd_conn, read_events); + + if (options.timeout) { + g_timeout_add(options.timeout, timeout_err, NULL); + } + + if (!options.api_call) { + return 0; + } + + if (pcmk__str_eq(options.api_call, "exec", pcmk__str_casei)) { + rc = lrmd_conn->cmds->exec(lrmd_conn, + options.rsc_id, + options.action, + NULL, + options.interval_ms, + options.timeout, + options.start_delay, + options.exec_call_opts, + options.params); + + if (rc > 0) { + exec_call_id = rc; + print_result("API-CALL 'exec' action pending, waiting on response"); + } + + } else if (pcmk__str_eq(options.api_call, "register_rsc", pcmk__str_casei)) { + rc = lrmd_conn->cmds->register_rsc(lrmd_conn, + options.rsc_id, + options.class, options.provider, options.type, 0); + } else if (pcmk__str_eq(options.api_call, "get_rsc_info", pcmk__str_casei)) { + lrmd_rsc_info_t *rsc_info; + + rsc_info = lrmd_conn->cmds->get_rsc_info(lrmd_conn, options.rsc_id, 0); + + if (rsc_info) { + print_result("RSC_INFO: id:%s class:%s provider:%s type:%s", + rsc_info->id, rsc_info->standard, + (rsc_info->provider? rsc_info->provider : ""), + rsc_info->type); + lrmd_free_rsc_info(rsc_info); + rc = pcmk_ok; + } else { + rc = -1; + } + } else if (pcmk__str_eq(options.api_call, "unregister_rsc", pcmk__str_casei)) { + rc = lrmd_conn->cmds->unregister_rsc(lrmd_conn, options.rsc_id, 0); + } else if (pcmk__str_eq(options.api_call, "cancel", pcmk__str_casei)) { + rc = lrmd_conn->cmds->cancel(lrmd_conn, options.rsc_id, options.action, + options.interval_ms); + } else if (pcmk__str_eq(options.api_call, "metadata", pcmk__str_casei)) { + char *output = NULL; + + rc = lrmd_conn->cmds->get_metadata(lrmd_conn, + options.class, + options.provider, options.type, &output, 0); + if (rc == pcmk_ok) { + print_result("%s", output); + free(output); + } + } else if (pcmk__str_eq(options.api_call, "list_agents", pcmk__str_casei)) { + lrmd_list_t *list = NULL; + lrmd_list_t *iter = NULL; + + rc = lrmd_conn->cmds->list_agents(lrmd_conn, &list, options.class, options.provider); + + if (rc > 0) { + print_result("%d agents found", rc); + for (iter = list; iter != NULL; iter = iter->next) { + print_result("%s", iter->val); + } + lrmd_list_freeall(list); + rc = 0; + } else { + print_result("API_CALL FAILURE - no agents found"); + rc = -1; + } + } else if (pcmk__str_eq(options.api_call, "list_ocf_providers", pcmk__str_casei)) { + lrmd_list_t *list = NULL; + lrmd_list_t *iter = NULL; + + rc = lrmd_conn->cmds->list_ocf_providers(lrmd_conn, options.type, &list); + + if (rc > 0) { + print_result("%d providers found", rc); + for (iter = list; iter != NULL; iter = iter->next) { + print_result("%s", iter->val); + } + lrmd_list_freeall(list); + rc = 0; + } else { + print_result("API_CALL FAILURE - no providers found"); + rc = -1; + } + + } else if (pcmk__str_eq(options.api_call, "list_standards", pcmk__str_casei)) { + lrmd_list_t *list = NULL; + lrmd_list_t *iter = NULL; + + rc = lrmd_conn->cmds->list_standards(lrmd_conn, &list); + + if (rc > 0) { + print_result("%d standards found", rc); + for (iter = list; iter != NULL; iter = iter->next) { + print_result("%s", iter->val); + } + lrmd_list_freeall(list); + rc = 0; + } else { + print_result("API_CALL FAILURE - no providers found"); + rc = -1; + } + + } else if (pcmk__str_eq(options.api_call, "get_recurring_ops", pcmk__str_casei)) { + GList *op_list = NULL; + GList *op_item = NULL; + rc = lrmd_conn->cmds->get_recurring_ops(lrmd_conn, options.rsc_id, 0, 0, + &op_list); + + for (op_item = op_list; op_item != NULL; op_item = op_item->next) { + lrmd_op_info_t *op_info = op_item->data; + + print_result("RECURRING_OP: %s_%s_%s timeout=%sms", + op_info->rsc_id, op_info->action, + op_info->interval_ms_s, op_info->timeout_ms_s); + lrmd_free_op_info(op_info); + } + g_list_free(op_list); + + } else if (options.api_call) { + print_result("API-CALL FAILURE unknown action '%s'", options.action); + test_exit(CRM_EX_ERROR); + } + + if (rc < 0) { + print_result("API-CALL FAILURE for '%s' api_rc:%d", + options.api_call, rc); + test_exit(CRM_EX_ERROR); + } + + if (options.api_call && rc == pcmk_ok) { + print_result("API-CALL SUCCESSFUL for '%s'", options.api_call); + if (!options.listen) { + test_exit(CRM_EX_OK); + } + } + + if (options.no_wait) { + /* just make the call and exit regardless of anything else. */ + test_exit(CRM_EX_OK); + } + + return 0; +} + +/*! + * \internal + * \brief Generate resource parameters from CIB if none explicitly given + * + * \return Standard Pacemaker return code + */ +static int +generate_params(void) +{ + int rc = pcmk_rc_ok; + pe_working_set_t *data_set = NULL; + xmlNode *cib_xml_copy = NULL; + pe_resource_t *rsc = NULL; + GHashTable *params = NULL; + GHashTable *meta = NULL; + GHashTableIter iter; + char *key = NULL; + char *value = NULL; + + if (options.params != NULL) { + return pcmk_rc_ok; // User specified parameters explicitly + } + + // Retrieve and update CIB + rc = cib__signon_query(NULL, NULL, &cib_xml_copy); + if (rc != pcmk_rc_ok) { + return rc; + } + if (!cli_config_update(&cib_xml_copy, NULL, FALSE)) { + crm_err("Could not update CIB"); + return pcmk_rc_cib_corrupt; + } + + // Calculate cluster status + data_set = pe_new_working_set(); + if (data_set == NULL) { + crm_crit("Could not allocate working set"); + return ENOMEM; + } + pe__set_working_set_flags(data_set, pe_flag_no_counts|pe_flag_no_compat); + data_set->input = cib_xml_copy; + data_set->now = crm_time_new(NULL); + cluster_status(data_set); + + // Find resource in CIB + rsc = pe_find_resource_with_flags(data_set->resources, options.rsc_id, + pe_find_renamed|pe_find_any); + if (rsc == NULL) { + crm_err("Resource does not exist in config"); + pe_free_working_set(data_set); + return EINVAL; + } + + // Add resource instance parameters to options.params + params = pe_rsc_params(rsc, NULL, data_set); + if (params != NULL) { + g_hash_table_iter_init(&iter, params); + while (g_hash_table_iter_next(&iter, (gpointer *) &key, + (gpointer *) &value)) { + options.params = lrmd_key_value_add(options.params, key, value); + } + } + + // Add resource meta-attributes to options.params + meta = pcmk__strkey_table(free, free); + get_meta_attributes(meta, rsc, NULL, data_set); + g_hash_table_iter_init(&iter, meta); + while (g_hash_table_iter_next(&iter, (gpointer *) &key, + (gpointer *) &value)) { + char *crm_name = crm_meta_name(key); + + options.params = lrmd_key_value_add(options.params, crm_name, value); + free(crm_name); + } + g_hash_table_destroy(meta); + + pe_free_working_set(data_set); + return rc; +} + +static GOptionContext * +build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { + GOptionContext *context = NULL; + + context = pcmk__build_arg_context(args, NULL, group, NULL); + + pcmk__add_main_args(context, basic_entries); + pcmk__add_arg_group(context, "api-call", "API Call Options:", + "Parameters for api-call option", api_call_entries); + + return context; +} + +int +main(int argc, char **argv) +{ + GError *error = NULL; + crm_exit_t exit_code = CRM_EX_OK; + crm_trigger_t *trig = NULL; + + pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); + /* Typically we'd pass all the single character options that take an argument + * as the second parameter here (and there's a bunch of those in this tool). + * However, we control how this program is called so we can just not call it + * in a way where the preprocessing ever matters. + */ + gchar **processed_args = pcmk__cmdline_preproc(argv, NULL); + GOptionContext *context = build_arg_context(args, NULL); + + if (!g_option_context_parse_strv(context, &processed_args, &error)) { + exit_code = CRM_EX_USAGE; + goto done; + } + + /* We have to use crm_log_init here to set up the logging because there's + * different handling for daemons vs. command line programs, and + * pcmk__cli_init_logging is set up to only handle the latter. + */ + crm_log_init(NULL, LOG_INFO, TRUE, (args->verbosity? TRUE : FALSE), argc, + argv, FALSE); + + for (int i = 0; i < args->verbosity; i++) { + crm_bump_log_level(argc, argv); + } + + if (!options.listen && pcmk__strcase_any_of(options.api_call, "metadata", "list_agents", + "list_standards", "list_ocf_providers", NULL)) { + options.no_connect = TRUE; + } + + if (options.is_running) { + int rc = pcmk_rc_ok; + + if (options.rsc_id == NULL) { + exit_code = CRM_EX_USAGE; + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "--is-running requires --rsc-id"); + goto done; + } + + options.interval_ms = 0; + if (options.timeout == 0) { + options.timeout = 30000; + } + + rc = generate_params(); + if (rc != pcmk_rc_ok) { + exit_code = pcmk_rc2exitc(rc); + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "Can not determine resource status: " + "unable to get parameters from CIB"); + goto done; + } + options.api_call = "exec"; + options.action = "monitor"; + options.exec_call_opts = lrmd_opt_notify_orig_only; + } + + if (!options.api_call && !options.listen) { + exit_code = CRM_EX_USAGE; + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "Must specify at least one of --api-call, --listen, " + "or --is-running"); + goto done; + } + + if (options.use_tls) { + lrmd_conn = lrmd_remote_api_new(NULL, "localhost", 0); + } else { + lrmd_conn = lrmd_api_new(); + } + trig = mainloop_add_trigger(G_PRIORITY_HIGH, start_test, NULL); + mainloop_set_trigger(trig); + mainloop_add_signal(SIGTERM, test_shutdown); + + crm_info("Starting"); + mainloop = g_main_loop_new(NULL, FALSE); + g_main_loop_run(mainloop); + +done: + g_strfreev(processed_args); + pcmk__free_arg_context(context); + + free(key); + free(val); + + pcmk__output_and_clear_error(&error, NULL); + return test_exit(exit_code); +} diff --git a/daemons/execd/execd_alerts.c b/daemons/execd/execd_alerts.c new file mode 100644 index 0000000..5944d93 --- /dev/null +++ b/daemons/execd/execd_alerts.c @@ -0,0 +1,205 @@ +/* + * Copyright 2016-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "pacemaker-execd.h" + +/* Track in-flight alerts so we can wait for them at shutdown */ +static GHashTable *inflight_alerts; /* key = call_id, value = timeout */ +static gboolean draining_alerts = FALSE; + +static inline void +add_inflight_alert(int call_id, int timeout) +{ + if (inflight_alerts == NULL) { + inflight_alerts = pcmk__intkey_table(NULL); + } + pcmk__intkey_table_insert(inflight_alerts, call_id, + GINT_TO_POINTER(timeout)); +} + +static inline void +remove_inflight_alert(int call_id) +{ + if (inflight_alerts != NULL) { + pcmk__intkey_table_remove(inflight_alerts, call_id); + } +} + +static int +max_inflight_timeout(void) +{ + GHashTableIter iter; + gpointer timeout; + int max_timeout = 0; + + if (inflight_alerts) { + g_hash_table_iter_init(&iter, inflight_alerts); + while (g_hash_table_iter_next(&iter, NULL, &timeout)) { + if (GPOINTER_TO_INT(timeout) > max_timeout) { + max_timeout = GPOINTER_TO_INT(timeout); + } + } + } + return max_timeout; +} + +struct alert_cb_s { + char *client_id; + int call_id; +}; + +static void +alert_complete(svc_action_t *action) +{ + struct alert_cb_s *cb_data = (struct alert_cb_s *) (action->cb_data); + + CRM_CHECK(cb_data != NULL, return); + + remove_inflight_alert(cb_data->call_id); + + if (action->status != PCMK_EXEC_DONE) { + const char *reason = services__exit_reason(action); + + crm_notice("Could not send alert: %s%s%s%s " CRM_XS " client=%s", + pcmk_exec_status_str(action->status), + (reason == NULL)? "" : " (", + (reason == NULL)? "" : reason, + (reason == NULL)? "" : ")", + cb_data->client_id); + + } else if (action->rc != 0) { + crm_notice("Alert [%d] completed but exited with status %d " + CRM_XS " client=%s", + action->pid, action->rc, cb_data->client_id); + + } else { + crm_debug("Alert [%d] completed " CRM_XS " client=%s", + action->pid, cb_data->client_id); + } + + free(cb_data->client_id); + free(action->cb_data); + action->cb_data = NULL; +} + +int +process_lrmd_alert_exec(pcmk__client_t *client, uint32_t id, xmlNode *request) +{ + static int alert_sequence_no = 0; + + xmlNode *alert_xml = get_xpath_object("//" F_LRMD_ALERT, request, LOG_ERR); + const char *alert_id = crm_element_value(alert_xml, F_LRMD_ALERT_ID); + const char *alert_path = crm_element_value(alert_xml, F_LRMD_ALERT_PATH); + svc_action_t *action = NULL; + int alert_timeout = 0; + int rc = pcmk_ok; + GHashTable *params = NULL; + struct alert_cb_s *cb_data = NULL; + + if ((alert_id == NULL) || (alert_path == NULL) || + (client == NULL) || (client->id == NULL)) { /* hint static analyzer */ + return -EINVAL; + } + if (draining_alerts) { + return pcmk_ok; + } + + crm_element_value_int(alert_xml, F_LRMD_TIMEOUT, &alert_timeout); + + crm_info("Executing alert %s for %s", alert_id, client->id); + + params = xml2list(alert_xml); + pcmk__add_alert_key_int(params, PCMK__alert_key_node_sequence, + ++alert_sequence_no); + + cb_data = calloc(1, sizeof(struct alert_cb_s)); + if (cb_data == NULL) { + rc = -errno; + goto err; + } + + /* coverity[deref_ptr] False Positive */ + cb_data->client_id = strdup(client->id); + if (cb_data->client_id == NULL) { + rc = -errno; + goto err; + } + + crm_element_value_int(request, F_LRMD_CALLID, &(cb_data->call_id)); + + action = services_alert_create(alert_id, alert_path, alert_timeout, params, + alert_sequence_no, cb_data); + if (action->rc != PCMK_OCF_UNKNOWN) { + rc = -E2BIG; + goto err; + } + + rc = services_action_user(action, CRM_DAEMON_USER); + if (rc < 0) { + goto err; + } + + add_inflight_alert(cb_data->call_id, alert_timeout); + if (services_alert_async(action, alert_complete) == FALSE) { + services_action_free(action); + } + return pcmk_ok; + +err: + if (cb_data) { + if (cb_data->client_id) { + free(cb_data->client_id); + } + free(cb_data); + } + services_action_free(action); + return rc; +} + +static bool +drain_check(guint remaining_timeout_ms) +{ + if (inflight_alerts != NULL) { + guint count = g_hash_table_size(inflight_alerts); + + if (count > 0) { + crm_trace("%d alerts pending (%.3fs timeout remaining)", + count, remaining_timeout_ms / 1000.0); + return TRUE; + } + } + return FALSE; +} + +void +lrmd_drain_alerts(GMainLoop *mloop) +{ + if (inflight_alerts != NULL) { + guint timer_ms = max_inflight_timeout() + 5000; + + crm_trace("Draining in-flight alerts (timeout %.3fs)", + timer_ms / 1000.0); + draining_alerts = TRUE; + pcmk_drain_main_loop(mloop, timer_ms, drain_check); + g_hash_table_destroy(inflight_alerts); + inflight_alerts = NULL; + } +} diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c new file mode 100644 index 0000000..fa2761e --- /dev/null +++ b/daemons/execd/execd_commands.c @@ -0,0 +1,1927 @@ +/* + * Copyright 2012-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include +#include + +#include + +// Check whether we have a high-resolution monotonic clock +#undef PCMK__TIME_USE_CGT +#if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC) +# define PCMK__TIME_USE_CGT +# include /* clock_gettime */ +#endif + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pacemaker-execd.h" + +GHashTable *rsc_list = NULL; + +typedef struct lrmd_cmd_s { + int timeout; + guint interval_ms; + int start_delay; + int timeout_orig; + + int call_id; + + int call_opts; + /* Timer ids, must be removed on cmd destruction. */ + int delay_id; + int stonith_recurring_id; + + int rsc_deleted; + + int service_flags; + + char *client_id; + char *origin; + char *rsc_id; + char *action; + char *real_action; + char *userdata_str; + + pcmk__action_result_t result; + + /* We can track operation queue time and run time, to be saved with the CIB + * resource history (and displayed in cluster status). We need + * high-resolution monotonic time for this purpose, so we use + * clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature + * is disabled). + * + * However, we also need epoch timestamps for recording the time the command + * last ran and the time its return value last changed, for use in time + * displays (as opposed to interval calculations). We keep time_t values for + * this purpose. + * + * The last run time is used for both purposes, so we keep redundant + * monotonic and epoch values for this. Technically the two could represent + * different times, but since time_t has only second resolution and the + * values are used for distinct purposes, that is not significant. + */ +#ifdef PCMK__TIME_USE_CGT + /* Recurring and systemd operations may involve more than one executor + * command per operation, so they need info about the original and the most + * recent. + */ + struct timespec t_first_run; // When op first ran + struct timespec t_run; // When op most recently ran + struct timespec t_first_queue; // When op was first queued + struct timespec t_queue; // When op was most recently queued +#endif + time_t epoch_last_run; // Epoch timestamp of when op last ran + time_t epoch_rcchange; // Epoch timestamp of when rc last changed + + bool first_notify_sent; + int last_notify_rc; + int last_notify_op_status; + int last_pid; + + GHashTable *params; +} lrmd_cmd_t; + +static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc); +static gboolean execute_resource_action(gpointer user_data); +static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id); + +#ifdef PCMK__TIME_USE_CGT + +/*! + * \internal + * \brief Check whether a struct timespec has been set + * + * \param[in] timespec Time to check + * + * \return true if timespec has been set (i.e. is nonzero), false otherwise + */ +static inline bool +time_is_set(const struct timespec *timespec) +{ + return (timespec != NULL) && + ((timespec->tv_sec != 0) || (timespec->tv_nsec != 0)); +} + +/* + * \internal + * \brief Set a timespec (and its original if unset) to the current time + * + * \param[out] t_current Where to store current time + * \param[out] t_orig Where to copy t_current if unset + */ +static void +get_current_time(struct timespec *t_current, struct timespec *t_orig) +{ + clock_gettime(CLOCK_MONOTONIC, t_current); + if ((t_orig != NULL) && !time_is_set(t_orig)) { + *t_orig = *t_current; + } +} + +/*! + * \internal + * \brief Return difference between two times in milliseconds + * + * \param[in] now More recent time (or NULL to use current time) + * \param[in] old Earlier time + * + * \return milliseconds difference (or 0 if old is NULL or unset) + * + * \note Can overflow on 32bit machines when the differences is around + * 24 days or more. + */ +static int +time_diff_ms(const struct timespec *now, const struct timespec *old) +{ + int diff_ms = 0; + + if (time_is_set(old)) { + struct timespec local_now = { 0, }; + + if (now == NULL) { + clock_gettime(CLOCK_MONOTONIC, &local_now); + now = &local_now; + } + diff_ms = (now->tv_sec - old->tv_sec) * 1000 + + (now->tv_nsec - old->tv_nsec) / 1000000; + } + return diff_ms; +} + +/*! + * \internal + * \brief Reset a command's operation times to their original values. + * + * Reset a command's run and queued timestamps to the timestamps of the original + * command, so we report the entire time since then and not just the time since + * the most recent command (for recurring and systemd operations). + * + * \param[in,out] cmd Executor command object to reset + * + * \note It's not obvious what the queued time should be for a systemd + * start/stop operation, which might go like this: + * initial command queued 5ms, runs 3s + * monitor command queued 10ms, runs 10s + * monitor command queued 10ms, runs 10s + * Is the queued time for that operation 5ms, 10ms or 25ms? The current + * implementation will report 5ms. If it's 25ms, then we need to + * subtract 20ms from the total exec time so as not to count it twice. + * We can implement that later if it matters to anyone ... + */ +static void +cmd_original_times(lrmd_cmd_t * cmd) +{ + cmd->t_run = cmd->t_first_run; + cmd->t_queue = cmd->t_first_queue; +} +#endif + +static inline bool +action_matches(const lrmd_cmd_t *cmd, const char *action, guint interval_ms) +{ + return (cmd->interval_ms == interval_ms) + && pcmk__str_eq(cmd->action, action, pcmk__str_casei); +} + +/*! + * \internal + * \brief Log the result of an asynchronous command + * + * \param[in] cmd Command to log result for + * \param[in] exec_time_ms Execution time in milliseconds, if known + * \param[in] queue_time_ms Queue time in milliseconds, if known + */ +static void +log_finished(const lrmd_cmd_t *cmd, int exec_time_ms, int queue_time_ms) +{ + int log_level = LOG_INFO; + GString *str = g_string_sized_new(100); // reasonable starting size + + if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + log_level = LOG_DEBUG; + } + + g_string_append_printf(str, "%s %s (call %d", + cmd->rsc_id, cmd->action, cmd->call_id); + if (cmd->last_pid != 0) { + g_string_append_printf(str, ", PID %d", cmd->last_pid); + } + if (cmd->result.execution_status == PCMK_EXEC_DONE) { + g_string_append_printf(str, ") exited with status %d", + cmd->result.exit_status); + } else { + pcmk__g_strcat(str, ") could not be executed: ", + pcmk_exec_status_str(cmd->result.execution_status), + NULL); + } + if (cmd->result.exit_reason != NULL) { + pcmk__g_strcat(str, " (", cmd->result.exit_reason, ")", NULL); + } + +#ifdef PCMK__TIME_USE_CGT + pcmk__g_strcat(str, " (execution time ", + pcmk__readable_interval(exec_time_ms), NULL); + if (queue_time_ms > 0) { + pcmk__g_strcat(str, " after being queued ", + pcmk__readable_interval(queue_time_ms), NULL); + } + g_string_append_c(str, ')'); +#endif + + do_crm_log(log_level, "%s", str->str); + g_string_free(str, TRUE); +} + +static void +log_execute(lrmd_cmd_t * cmd) +{ + int log_level = LOG_INFO; + + if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + log_level = LOG_DEBUG; + } + + do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d", + cmd->rsc_id, cmd->action, cmd->call_id); +} + +static const char * +normalize_action_name(lrmd_rsc_t * rsc, const char *action) +{ + if (pcmk__str_eq(action, "monitor", pcmk__str_casei) && + pcmk_is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) { + return "status"; + } + return action; +} + +static lrmd_rsc_t * +build_rsc_from_xml(xmlNode * msg) +{ + xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); + lrmd_rsc_t *rsc = NULL; + + rsc = calloc(1, sizeof(lrmd_rsc_t)); + + crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts); + + rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); + rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS); + rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER); + rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE); + rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, execute_resource_action, + rsc); + + // Initialize fence device probes (to return "not running") + pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR, + PCMK_EXEC_NO_FENCE_DEVICE, NULL); + return rsc; +} + +static lrmd_cmd_t * +create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client) +{ + int call_options = 0; + xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); + lrmd_cmd_t *cmd = NULL; + + cmd = calloc(1, sizeof(lrmd_cmd_t)); + + crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options); + cmd->call_opts = call_options; + cmd->client_id = strdup(client->id); + + crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id); + crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval_ms); + crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout); + crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay); + cmd->timeout_orig = cmd->timeout; + + cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN); + cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION); + cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR); + cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); + + cmd->params = xml2list(rsc_xml); + + if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block", pcmk__str_casei)) { + crm_debug("Setting flag to leave pid group on timeout and " + "only kill action pid for " PCMK__OP_FMT, + cmd->rsc_id, cmd->action, cmd->interval_ms); + cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__, + LOG_TRACE, "Action", + cmd->action, 0, + SVC_ACTION_LEAVE_GROUP, + "SVC_ACTION_LEAVE_GROUP"); + } + return cmd; +} + +static void +stop_recurring_timer(lrmd_cmd_t *cmd) +{ + if (cmd) { + if (cmd->stonith_recurring_id) { + g_source_remove(cmd->stonith_recurring_id); + } + cmd->stonith_recurring_id = 0; + } +} + +static void +free_lrmd_cmd(lrmd_cmd_t * cmd) +{ + stop_recurring_timer(cmd); + if (cmd->delay_id) { + g_source_remove(cmd->delay_id); + } + if (cmd->params) { + g_hash_table_destroy(cmd->params); + } + pcmk__reset_result(&(cmd->result)); + free(cmd->origin); + free(cmd->action); + free(cmd->real_action); + free(cmd->userdata_str); + free(cmd->rsc_id); + free(cmd->client_id); + free(cmd); +} + +static gboolean +stonith_recurring_op_helper(gpointer data) +{ + lrmd_cmd_t *cmd = data; + lrmd_rsc_t *rsc; + + cmd->stonith_recurring_id = 0; + + if (!cmd->rsc_id) { + return FALSE; + } + + rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); + + CRM_ASSERT(rsc != NULL); + /* take it out of recurring_ops list, and put it in the pending ops + * to be executed */ + rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); + rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); +#ifdef PCMK__TIME_USE_CGT + get_current_time(&(cmd->t_queue), &(cmd->t_first_queue)); +#endif + mainloop_set_trigger(rsc->work); + + return FALSE; +} + +static inline void +start_recurring_timer(lrmd_cmd_t *cmd) +{ + if (cmd && (cmd->interval_ms > 0)) { + cmd->stonith_recurring_id = g_timeout_add(cmd->interval_ms, + stonith_recurring_op_helper, + cmd); + } +} + +static gboolean +start_delay_helper(gpointer data) +{ + lrmd_cmd_t *cmd = data; + lrmd_rsc_t *rsc = NULL; + + cmd->delay_id = 0; + rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; + + if (rsc) { + mainloop_set_trigger(rsc->work); + } + + return FALSE; +} + +/*! + * \internal + * \brief Check whether a list already contains the equivalent of a given action + * + * \param[in] action_list List to search + * \param[in] cmd Action to search for + */ +static lrmd_cmd_t * +find_duplicate_action(const GList *action_list, const lrmd_cmd_t *cmd) +{ + for (const GList *item = action_list; item != NULL; item = item->next) { + lrmd_cmd_t *dup = item->data; + + if (action_matches(cmd, dup->action, dup->interval_ms)) { + return dup; + } + } + return NULL; +} + +static bool +merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) +{ + lrmd_cmd_t * dup = NULL; + bool dup_pending = true; + + if (cmd->interval_ms == 0) { + return false; + } + + // Search for a duplicate of this action (in-flight or not) + dup = find_duplicate_action(rsc->pending_ops, cmd); + if (dup == NULL) { + dup_pending = false; + dup = find_duplicate_action(rsc->recurring_ops, cmd); + if (dup == NULL) { + return false; + } + } + + /* Do not merge fencing monitors marked for cancellation, so we can reply to + * the cancellation separately. + */ + if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, + pcmk__str_casei) + && (dup->result.execution_status == PCMK_EXEC_CANCELLED)) { + return false; + } + + /* This should not occur. If it does, we need to investigate how something + * like this is possible in the controller. + */ + crm_warn("Duplicate recurring op entry detected (" PCMK__OP_FMT + "), merging with previous op entry", + rsc->rsc_id, normalize_action_name(rsc, dup->action), + dup->interval_ms); + + // Merge new action's call ID and user data into existing action + dup->first_notify_sent = false; + free(dup->userdata_str); + dup->userdata_str = cmd->userdata_str; + cmd->userdata_str = NULL; + dup->call_id = cmd->call_id; + free_lrmd_cmd(cmd); + cmd = NULL; + + /* If dup is not pending, that means it has already executed at least once + * and is waiting in the interval. In that case, stop waiting and initiate + * a new instance now. + */ + if (!dup_pending) { + if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, + pcmk__str_casei)) { + stop_recurring_timer(dup); + stonith_recurring_op_helper(dup); + } else { + services_action_kick(rsc->rsc_id, + normalize_action_name(rsc, dup->action), + dup->interval_ms); + } + } + return true; +} + +static void +schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) +{ + CRM_CHECK(cmd != NULL, return); + CRM_CHECK(rsc != NULL, return); + + crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id); + + if (merge_recurring_duplicate(rsc, cmd)) { + // Equivalent of cmd has already been scheduled + return; + } + + /* The controller expects the executor to automatically cancel + * recurring operations before a resource stops. + */ + if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { + cancel_all_recurring(rsc, NULL); + } + + rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); +#ifdef PCMK__TIME_USE_CGT + get_current_time(&(cmd->t_queue), &(cmd->t_first_queue)); +#endif + mainloop_set_trigger(rsc->work); + + if (cmd->start_delay) { + cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); + } +} + +static xmlNode * +create_lrmd_reply(const char *origin, int rc, int call_id) +{ + xmlNode *reply = create_xml_node(NULL, T_LRMD_REPLY); + + crm_xml_add(reply, F_LRMD_ORIGIN, origin); + crm_xml_add_int(reply, F_LRMD_RC, rc); + crm_xml_add_int(reply, F_LRMD_CALLID, call_id); + return reply; +} + +static void +send_client_notify(gpointer key, gpointer value, gpointer user_data) +{ + xmlNode *update_msg = user_data; + pcmk__client_t *client = value; + int rc; + int log_level = LOG_WARNING; + const char *msg = NULL; + + CRM_CHECK(client != NULL, return); + if (client->name == NULL) { + crm_trace("Skipping notification to client without name"); + return; + } + if (pcmk_is_set(client->flags, pcmk__client_to_proxy)) { + /* We only want to notify clients of the executor IPC API. If we are + * running as Pacemaker Remote, we may have clients proxied to other + * IPC services in the cluster, so skip those. + */ + crm_trace("Skipping executor API notification to client %s", + pcmk__client_name(client)); + return; + } + + rc = lrmd_server_send_notify(client, update_msg); + if (rc == pcmk_rc_ok) { + return; + } + + switch (rc) { + case ENOTCONN: + case EPIPE: // Client exited without waiting for notification + log_level = LOG_INFO; + msg = "Disconnected"; + break; + + default: + msg = pcmk_rc_str(rc); + break; + } + do_crm_log(log_level, "Could not notify client %s: %s " CRM_XS " rc=%d", + pcmk__client_name(client), msg, rc); +} + +static void +send_cmd_complete_notify(lrmd_cmd_t * cmd) +{ + xmlNode *notify = NULL; + int exec_time = 0; + int queue_time = 0; + +#ifdef PCMK__TIME_USE_CGT + exec_time = time_diff_ms(NULL, &(cmd->t_run)); + queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue)); +#endif + log_finished(cmd, exec_time, queue_time); + + /* If the originator requested to be notified only for changes in recurring + * operation results, skip the notification if the result hasn't changed. + */ + if (cmd->first_notify_sent + && pcmk_is_set(cmd->call_opts, lrmd_opt_notify_changes_only) + && (cmd->last_notify_rc == cmd->result.exit_status) + && (cmd->last_notify_op_status == cmd->result.execution_status)) { + return; + } + + cmd->first_notify_sent = true; + cmd->last_notify_rc = cmd->result.exit_status; + cmd->last_notify_op_status = cmd->result.execution_status; + + notify = create_xml_node(NULL, T_LRMD_NOTIFY); + + crm_xml_add(notify, F_LRMD_ORIGIN, __func__); + crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout); + crm_xml_add_ms(notify, F_LRMD_RSC_INTERVAL, cmd->interval_ms); + crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay); + crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->result.exit_status); + crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->result.execution_status); + crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id); + crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted); + + crm_xml_add_ll(notify, F_LRMD_RSC_RUN_TIME, + (long long) cmd->epoch_last_run); + crm_xml_add_ll(notify, F_LRMD_RSC_RCCHANGE_TIME, + (long long) cmd->epoch_rcchange); +#ifdef PCMK__TIME_USE_CGT + crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time); + crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time); +#endif + + crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC); + crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id); + if(cmd->real_action) { + crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action); + } else { + crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action); + } + crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str); + crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->result.exit_reason); + + if (cmd->result.action_stderr != NULL) { + crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->result.action_stderr); + + } else if (cmd->result.action_stdout != NULL) { + crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->result.action_stdout); + } + + if (cmd->params) { + char *key = NULL; + char *value = NULL; + GHashTableIter iter; + + xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS); + + g_hash_table_iter_init(&iter, cmd->params); + while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { + hash2smartfield((gpointer) key, (gpointer) value, args); + } + } + if ((cmd->client_id != NULL) + && pcmk_is_set(cmd->call_opts, lrmd_opt_notify_orig_only)) { + + pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id); + + if (client != NULL) { + send_client_notify(client->id, client, notify); + } + } else { + pcmk__foreach_ipc_client(send_client_notify, notify); + } + + free_xml(notify); +} + +static void +send_generic_notify(int rc, xmlNode * request) +{ + if (pcmk__ipc_client_count() != 0) { + int call_id = 0; + xmlNode *notify = NULL; + xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); + const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); + const char *op = crm_element_value(request, F_LRMD_OPERATION); + + crm_element_value_int(request, F_LRMD_CALLID, &call_id); + + notify = create_xml_node(NULL, T_LRMD_NOTIFY); + crm_xml_add(notify, F_LRMD_ORIGIN, __func__); + crm_xml_add_int(notify, F_LRMD_RC, rc); + crm_xml_add_int(notify, F_LRMD_CALLID, call_id); + crm_xml_add(notify, F_LRMD_OPERATION, op); + crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id); + + pcmk__foreach_ipc_client(send_client_notify, notify); + + free_xml(notify); + } +} + +static void +cmd_reset(lrmd_cmd_t * cmd) +{ + cmd->last_pid = 0; +#ifdef PCMK__TIME_USE_CGT + memset(&cmd->t_run, 0, sizeof(cmd->t_run)); + memset(&cmd->t_queue, 0, sizeof(cmd->t_queue)); +#endif + cmd->epoch_last_run = 0; + + pcmk__reset_result(&(cmd->result)); + cmd->result.execution_status = PCMK_EXEC_DONE; +} + +static void +cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc) +{ + crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action, + rsc ? rsc->active : NULL, cmd); + + if (rsc && (rsc->active == cmd)) { + rsc->active = NULL; + mainloop_set_trigger(rsc->work); + } + + if (!rsc) { + cmd->rsc_deleted = 1; + } + + /* reset original timeout so client notification has correct information */ + cmd->timeout = cmd->timeout_orig; + + send_cmd_complete_notify(cmd); + + if ((cmd->interval_ms != 0) + && (cmd->result.execution_status == PCMK_EXEC_CANCELLED)) { + + if (rsc) { + rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); + rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); + } + free_lrmd_cmd(cmd); + } else if (cmd->interval_ms == 0) { + if (rsc) { + rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); + } + free_lrmd_cmd(cmd); + } else { + /* Clear all the values pertaining just to the last iteration of a recurring op. */ + cmd_reset(cmd); + } +} + +struct notify_new_client_data { + xmlNode *notify; + pcmk__client_t *new_client; +}; + +static void +notify_one_client(gpointer key, gpointer value, gpointer user_data) +{ + pcmk__client_t *client = value; + struct notify_new_client_data *data = user_data; + + if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) { + send_client_notify(key, (gpointer) client, (gpointer) data->notify); + } +} + +void +notify_of_new_client(pcmk__client_t *new_client) +{ + struct notify_new_client_data data; + + data.new_client = new_client; + data.notify = create_xml_node(NULL, T_LRMD_NOTIFY); + crm_xml_add(data.notify, F_LRMD_ORIGIN, __func__); + crm_xml_add(data.notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT); + pcmk__foreach_ipc_client(notify_one_client, &data); + free_xml(data.notify); +} + +void +client_disconnect_cleanup(const char *client_id) +{ + GHashTableIter iter; + lrmd_rsc_t *rsc = NULL; + char *key = NULL; + + g_hash_table_iter_init(&iter, rsc_list); + while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { + if (pcmk_all_flags_set(rsc->call_opts, lrmd_opt_drop_recurring)) { + /* This client is disconnecting, drop any recurring operations + * it may have initiated on the resource */ + cancel_all_recurring(rsc, client_id); + } + } +} + +static void +action_complete(svc_action_t * action) +{ + lrmd_rsc_t *rsc; + lrmd_cmd_t *cmd = action->cb_data; + enum ocf_exitcode code; + +#ifdef PCMK__TIME_USE_CGT + const char *rclass = NULL; + bool goagain = false; +#endif + + if (!cmd) { + crm_err("Completed executor action (%s) does not match any known operations", + action->id); + return; + } + +#ifdef PCMK__TIME_USE_CGT + if (cmd->result.exit_status != action->rc) { + cmd->epoch_rcchange = time(NULL); + } +#endif + + cmd->last_pid = action->pid; + + // Cast variable instead of function return to keep compilers happy + code = services_result2ocf(action->standard, cmd->action, action->rc); + pcmk__set_result(&(cmd->result), (int) code, + action->status, services__exit_reason(action)); + + rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; + +#ifdef PCMK__TIME_USE_CGT + if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) { + rclass = resources_find_service_class(rsc->type); + } else if(rsc) { + rclass = rsc->class; + } + + if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) { + if (pcmk__result_ok(&(cmd->result)) + && pcmk__strcase_any_of(cmd->action, "start", "stop", NULL)) { + /* systemd returns from start and stop actions after the action + * begins, not after it completes. We have to jump through a few + * hoops so that we don't report 'complete' to the rest of pacemaker + * until it's actually done. + */ + goagain = true; + cmd->real_action = cmd->action; + cmd->action = strdup("monitor"); + + } else if (cmd->real_action != NULL) { + // This is follow-up monitor to check whether start/stop completed + if (cmd->result.execution_status == PCMK_EXEC_PENDING) { + goagain = true; + + } else if (pcmk__result_ok(&(cmd->result)) + && pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) { + goagain = true; + + } else { + int time_sum = time_diff_ms(NULL, &(cmd->t_first_run)); + int timeout_left = cmd->timeout_orig - time_sum; + + crm_debug("%s systemd %s is now complete (elapsed=%dms, " + "remaining=%dms): %s (%d)", + cmd->rsc_id, cmd->real_action, time_sum, timeout_left, + services_ocf_exitcode_str(cmd->result.exit_status), + cmd->result.exit_status); + cmd_original_times(cmd); + + // Monitors may return "not running", but start/stop shouldn't + if ((cmd->result.execution_status == PCMK_EXEC_DONE) + && (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) { + + if (pcmk__str_eq(cmd->real_action, "start", pcmk__str_casei)) { + cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR; + } else if (pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) { + cmd->result.exit_status = PCMK_OCF_OK; + } + } + } + } + } +#endif + +#if SUPPORT_NAGIOS + if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) { + if (action_matches(cmd, "monitor", 0) + && pcmk__result_ok(&(cmd->result))) { + /* Successfully executed --version for the nagios plugin */ + cmd->result.exit_status = PCMK_OCF_NOT_RUNNING; + + } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei) + && !pcmk__result_ok(&(cmd->result))) { +#ifdef PCMK__TIME_USE_CGT + goagain = true; +#endif + } + } +#endif + +#ifdef PCMK__TIME_USE_CGT + if (goagain) { + int time_sum = time_diff_ms(NULL, &(cmd->t_first_run)); + int timeout_left = cmd->timeout_orig - time_sum; + int delay = cmd->timeout_orig / 10; + + if(delay >= timeout_left && timeout_left > 20) { + delay = timeout_left/2; + } + + delay = QB_MIN(2000, delay); + if (delay < timeout_left) { + cmd->start_delay = delay; + cmd->timeout = timeout_left; + + if (pcmk__result_ok(&(cmd->result))) { + crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", + cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay); + + } else if (cmd->result.execution_status == PCMK_EXEC_PENDING) { + crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", + cmd->rsc_id, cmd->action, time_sum, timeout_left, delay); + + } else { + crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", + cmd->rsc_id, cmd->action, + services_ocf_exitcode_str(cmd->result.exit_status), + cmd->result.exit_status, time_sum, timeout_left, + delay); + } + + cmd_reset(cmd); + if(rsc) { + rsc->active = NULL; + } + schedule_lrmd_cmd(rsc, cmd); + + /* Don't finalize cmd, we're not done with it yet */ + return; + + } else { + crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)", + cmd->rsc_id, + (cmd->real_action? cmd->real_action : cmd->action), + cmd->result.exit_status, time_sum, timeout_left); + pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, + PCMK_EXEC_TIMEOUT, + "Investigate reason for timeout, and adjust " + "configured operation timeout if necessary"); + cmd_original_times(cmd); + } + } +#endif + + pcmk__set_result_output(&(cmd->result), services__grab_stdout(action), + services__grab_stderr(action)); + cmd_finalize(cmd, rsc); +} + +/*! + * \internal + * \brief Process the result of a fence device action (start, stop, or monitor) + * + * \param[in,out] cmd Fence device action that completed + * \param[in] exit_status Fencer API exit status for action + * \param[in] execution_status Fencer API execution status for action + * \param[in] exit_reason Human-friendly detail, if action failed + */ +static void +stonith_action_complete(lrmd_cmd_t *cmd, int exit_status, + enum pcmk_exec_status execution_status, + const char *exit_reason) +{ + // This can be NULL if resource was removed before command completed + lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); + + // Simplify fencer exit status to uniform exit status + if (exit_status != CRM_EX_OK) { + exit_status = PCMK_OCF_UNKNOWN_ERROR; + } + + if (cmd->result.execution_status == PCMK_EXEC_CANCELLED) { + /* An in-flight fence action was cancelled. The execution status is + * already correct, so don't overwrite it. + */ + execution_status = PCMK_EXEC_CANCELLED; + + } else { + /* Some execution status codes have specific meanings for the fencer + * that executor clients may not expect, so map them to a simple error + * status. + */ + switch (execution_status) { + case PCMK_EXEC_NOT_CONNECTED: + case PCMK_EXEC_INVALID: + execution_status = PCMK_EXEC_ERROR; + break; + + case PCMK_EXEC_NO_FENCE_DEVICE: + /* This should be possible only for probes in practice, but + * interpret for all actions to be safe. + */ + if (pcmk__str_eq(cmd->action, CRMD_ACTION_STATUS, + pcmk__str_none)) { + exit_status = PCMK_OCF_NOT_RUNNING; + + } else if (pcmk__str_eq(cmd->action, CRMD_ACTION_STOP, + pcmk__str_none)) { + exit_status = PCMK_OCF_OK; + + } else { + exit_status = PCMK_OCF_NOT_INSTALLED; + } + execution_status = PCMK_EXEC_ERROR; + break; + + case PCMK_EXEC_NOT_SUPPORTED: + exit_status = PCMK_OCF_UNIMPLEMENT_FEATURE; + break; + + default: + break; + } + } + + pcmk__set_result(&cmd->result, exit_status, execution_status, exit_reason); + + // Certain successful actions change the known state of the resource + if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) { + + if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) { + pcmk__set_result(&rsc->fence_probe_result, CRM_EX_OK, + PCMK_EXEC_DONE, NULL); // "running" + + } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { + pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR, + PCMK_EXEC_NO_FENCE_DEVICE, NULL); // "not running" + } + } + + /* The recurring timer should not be running at this point in any case, but + * as a failsafe, stop it if it is. + */ + stop_recurring_timer(cmd); + + /* Reschedule this command if appropriate. If a recurring command is *not* + * rescheduled, its status must be PCMK_EXEC_CANCELLED, otherwise it will + * not be removed from recurring_ops by cmd_finalize(). + */ + if (rsc && (cmd->interval_ms > 0) + && (cmd->result.execution_status != PCMK_EXEC_CANCELLED)) { + start_recurring_timer(cmd); + } + + cmd_finalize(cmd, rsc); +} + +static void +lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data) +{ + if ((data == NULL) || (data->userdata == NULL)) { + crm_err("Ignoring fence action result: " + "Invalid callback arguments (bug?)"); + } else { + stonith_action_complete((lrmd_cmd_t *) data->userdata, + stonith__exit_status(data), + stonith__execution_status(data), + stonith__exit_reason(data)); + } +} + +void +stonith_connection_failed(void) +{ + GHashTableIter iter; + lrmd_rsc_t *rsc = NULL; + + crm_warn("Connection to fencer lost (any pending operations for " + "fence devices will be considered failed)"); + + g_hash_table_iter_init(&iter, rsc_list); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &rsc)) { + if (!pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, + pcmk__str_none)) { + continue; + } + + /* If we registered this fence device, we don't know whether the + * fencer still has the registration or not. Cause future probes to + * return an error until the resource is stopped or started + * successfully. This is especially important if the controller also + * went away (possibly due to a cluster layer restart) and won't + * receive our client notification of any monitors finalized below. + */ + if (rsc->fence_probe_result.execution_status == PCMK_EXEC_DONE) { + pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR, + PCMK_EXEC_NOT_CONNECTED, + "Lost connection to fencer"); + } + + // Consider any active, pending, or recurring operations as failed + + for (GList *op = rsc->recurring_ops; op != NULL; op = op->next) { + lrmd_cmd_t *cmd = op->data; + + /* This won't free a recurring op but instead restart its timer. + * If cmd is rsc->active, this will set rsc->active to NULL, so we + * don't have to worry about finalizing it a second time below. + */ + stonith_action_complete(cmd, + CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED, + "Lost connection to fencer"); + } + + if (rsc->active != NULL) { + rsc->pending_ops = g_list_prepend(rsc->pending_ops, rsc->active); + } + while (rsc->pending_ops != NULL) { + // This will free the op and remove it from rsc->pending_ops + stonith_action_complete((lrmd_cmd_t *) rsc->pending_ops->data, + CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED, + "Lost connection to fencer"); + } + } +} + +/*! + * \internal + * \brief Execute a stonith resource "start" action + * + * Start a stonith resource by registering it with the fencer. + * (Stonith agents don't have a start command.) + * + * \param[in,out] stonith_api Connection to fencer + * \param[in] rsc Stonith resource to start + * \param[in] cmd Start command to execute + * + * \return pcmk_ok on success, -errno otherwise + */ +static int +execd_stonith_start(stonith_t *stonith_api, const lrmd_rsc_t *rsc, + const lrmd_cmd_t *cmd) +{ + char *key = NULL; + char *value = NULL; + stonith_key_value_t *device_params = NULL; + int rc = pcmk_ok; + + // Convert command parameters to stonith API key/values + if (cmd->params) { + GHashTableIter iter; + + g_hash_table_iter_init(&iter, cmd->params); + while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { + device_params = stonith_key_value_add(device_params, key, value); + } + } + + /* The fencer will automatically register devices via CIB notifications + * when the CIB changes, but to avoid a possible race condition between + * the fencer receiving the notification and the executor requesting that + * resource, the executor registers the device as well. The fencer knows how + * to handle duplicate registrations. + */ + rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call, + cmd->rsc_id, rsc->provider, + rsc->type, device_params); + + stonith_key_value_freeall(device_params, 1, 1); + return rc; +} + +/*! + * \internal + * \brief Execute a stonith resource "stop" action + * + * Stop a stonith resource by unregistering it with the fencer. + * (Stonith agents don't have a stop command.) + * + * \param[in,out] stonith_api Connection to fencer + * \param[in] rsc Stonith resource to stop + * + * \return pcmk_ok on success, -errno otherwise + */ +static inline int +execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc) +{ + /* @TODO Failure would indicate a problem communicating with fencer; + * perhaps we should try reconnecting and retrying a few times? + */ + return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, + rsc->rsc_id); +} + +/*! + * \internal + * \brief Initiate a stonith resource agent recurring "monitor" action + * + * \param[in,out] stonith_api Connection to fencer + * \param[in,out] rsc Stonith resource to monitor + * \param[in] cmd Monitor command being executed + * + * \return pcmk_ok if monitor was successfully initiated, -errno otherwise + */ +static inline int +execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) +{ + int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id, + cmd->timeout / 1000); + + rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd, + "lrmd_stonith_callback", + lrmd_stonith_callback); + if (rc == TRUE) { + rsc->active = cmd; + rc = pcmk_ok; + } else { + rc = -pcmk_err_generic; + } + return rc; +} + +static void +execute_stonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) +{ + int rc = 0; + bool do_monitor = FALSE; + + stonith_t *stonith_api = get_stonith_connection(); + + if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei) + && (cmd->interval_ms == 0)) { + // Probes don't require a fencer connection + stonith_action_complete(cmd, rsc->fence_probe_result.exit_status, + rsc->fence_probe_result.execution_status, + rsc->fence_probe_result.exit_reason); + return; + + } else if (stonith_api == NULL) { + stonith_action_complete(cmd, PCMK_OCF_UNKNOWN_ERROR, + PCMK_EXEC_NOT_CONNECTED, + "No connection to fencer"); + return; + + } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) { + rc = execd_stonith_start(stonith_api, rsc, cmd); + if (rc == pcmk_ok) { + do_monitor = TRUE; + } + + } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { + rc = execd_stonith_stop(stonith_api, rsc); + + } else if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + do_monitor = TRUE; + + } else { + stonith_action_complete(cmd, PCMK_OCF_UNIMPLEMENT_FEATURE, + PCMK_EXEC_ERROR, + "Invalid fence device action (bug?)"); + return; + } + + if (do_monitor) { + rc = execd_stonith_monitor(stonith_api, rsc, cmd); + if (rc == pcmk_ok) { + // Don't clean up yet, we will find out result of the monitor later + return; + } + } + + stonith_action_complete(cmd, + ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR), + stonith__legacy2status(rc), + ((rc == -pcmk_err_generic)? NULL : pcmk_strerror(rc))); +} + +static void +execute_nonstonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) +{ + svc_action_t *action = NULL; + GHashTable *params_copy = NULL; + + CRM_ASSERT(rsc); + CRM_ASSERT(cmd); + + crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s", + rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type); + +#if SUPPORT_NAGIOS + /* Recurring operations are cancelled anyway for a stop operation */ + if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei) + && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { + + cmd->result.exit_status = PCMK_OCF_OK; + cmd_finalize(cmd, rsc); + return; + } +#endif + + params_copy = pcmk__str_table_dup(cmd->params); + + action = services__create_resource_action(rsc->rsc_id, rsc->class, rsc->provider, + rsc->type, + normalize_action_name(rsc, cmd->action), + cmd->interval_ms, cmd->timeout, + params_copy, cmd->service_flags); + + if (action == NULL) { + pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, + PCMK_EXEC_ERROR, strerror(ENOMEM)); + cmd_finalize(cmd, rsc); + return; + } + + if (action->rc != PCMK_OCF_UNKNOWN) { + pcmk__set_result(&(cmd->result), action->rc, action->status, + services__exit_reason(action)); + services_action_free(action); + cmd_finalize(cmd, rsc); + return; + } + + action->cb_data = cmd; + + if (services_action_async(action, action_complete)) { + /* The services library has taken responsibility for the action. It + * could be pending, blocked, or merged into a duplicate recurring + * action, in which case the action callback (action_complete()) + * will be called when the action completes, otherwise the callback has + * already been called. + * + * action_complete() calls cmd_finalize() which can free cmd, so cmd + * cannot be used here. + */ + } else { + /* This is a recurring action that is not being cancelled and could not + * be initiated. It has been rescheduled, and the action callback + * (action_complete()) has been called, which in this case has already + * called cmd_finalize(), which in this case should only reset (not + * free) cmd. + */ + + pcmk__set_result(&(cmd->result), action->rc, action->status, + services__exit_reason(action)); + services_action_free(action); + } +} + +static gboolean +execute_resource_action(gpointer user_data) +{ + lrmd_rsc_t *rsc = (lrmd_rsc_t *) user_data; + lrmd_cmd_t *cmd = NULL; + + CRM_CHECK(rsc != NULL, return FALSE); + + if (rsc->active) { + crm_trace("%s is still active", rsc->rsc_id); + return TRUE; + } + + if (rsc->pending_ops) { + GList *first = rsc->pending_ops; + + cmd = first->data; + if (cmd->delay_id) { + crm_trace + ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms", + cmd->rsc_id, cmd->action, cmd->start_delay); + return TRUE; + } + rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first); + g_list_free_1(first); + +#ifdef PCMK__TIME_USE_CGT + get_current_time(&(cmd->t_run), &(cmd->t_first_run)); +#endif + cmd->epoch_last_run = time(NULL); + } + + if (!cmd) { + crm_trace("Nothing further to do for %s", rsc->rsc_id); + return TRUE; + } + + rsc->active = cmd; /* only one op at a time for a rsc */ + if (cmd->interval_ms) { + rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd); + } + + log_execute(cmd); + + if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { + execute_stonith_action(rsc, cmd); + } else { + execute_nonstonith_action(rsc, cmd); + } + + return TRUE; +} + +void +free_rsc(gpointer data) +{ + GList *gIter = NULL; + lrmd_rsc_t *rsc = data; + int is_stonith = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, + pcmk__str_casei); + + gIter = rsc->pending_ops; + while (gIter != NULL) { + GList *next = gIter->next; + lrmd_cmd_t *cmd = gIter->data; + + /* command was never executed */ + cmd->result.execution_status = PCMK_EXEC_CANCELLED; + cmd_finalize(cmd, NULL); + + gIter = next; + } + /* frees list, but not list elements. */ + g_list_free(rsc->pending_ops); + + gIter = rsc->recurring_ops; + while (gIter != NULL) { + GList *next = gIter->next; + lrmd_cmd_t *cmd = gIter->data; + + if (is_stonith) { + cmd->result.execution_status = PCMK_EXEC_CANCELLED; + /* If a stonith command is in-flight, just mark it as cancelled; + * it is not safe to finalize/free the cmd until the stonith api + * says it has either completed or timed out. + */ + if (rsc->active != cmd) { + cmd_finalize(cmd, NULL); + } + } else { + /* This command is already handed off to service library, + * let service library cancel it and tell us via the callback + * when it is cancelled. The rsc can be safely destroyed + * even if we are waiting for the cancel result */ + services_action_cancel(rsc->rsc_id, + normalize_action_name(rsc, cmd->action), + cmd->interval_ms); + } + + gIter = next; + } + /* frees list, but not list elements. */ + g_list_free(rsc->recurring_ops); + + free(rsc->rsc_id); + free(rsc->class); + free(rsc->provider); + free(rsc->type); + mainloop_destroy_trigger(rsc->work); + + free(rsc); +} + +static int +process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id, + xmlNode **reply) +{ + int rc = pcmk_ok; + time_t now = time(NULL); + const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION); + + if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) { + crm_err("Cluster API version must be greater than or equal to %s, not %s", + LRMD_MIN_PROTOCOL_VERSION, protocol_version); + rc = -EPROTO; + } + + if (pcmk__xe_attr_is_true(request, F_LRMD_IS_IPC_PROVIDER)) { +#ifdef PCMK__COMPILE_REMOTE + if ((client->remote != NULL) + && pcmk_is_set(client->flags, + pcmk__client_tls_handshake_complete)) { + + // This is a remote connection from a cluster node's controller + ipc_proxy_add_provider(client); + } else { + rc = -EACCES; + } +#else + rc = -EPROTONOSUPPORT; +#endif + } + + *reply = create_lrmd_reply(__func__, rc, call_id); + crm_xml_add(*reply, F_LRMD_OPERATION, CRM_OP_REGISTER); + crm_xml_add(*reply, F_LRMD_CLIENTID, client->id); + crm_xml_add(*reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); + crm_xml_add_ll(*reply, PCMK__XA_UPTIME, now - start_time); + + return rc; +} + +static int +process_lrmd_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request) +{ + int rc = pcmk_ok; + lrmd_rsc_t *rsc = build_rsc_from_xml(request); + lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id); + + if (dup && + pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) && + pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) && pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) { + + crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id); + free_rsc(rsc); + return rc; + } + + g_hash_table_replace(rsc_list, rsc->rsc_id, rsc); + crm_info("Cached agent information for '%s'", rsc->rsc_id); + return rc; +} + +static xmlNode * +process_lrmd_get_rsc_info(xmlNode *request, int call_id) +{ + int rc = pcmk_ok; + xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); + const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); + xmlNode *reply = NULL; + lrmd_rsc_t *rsc = NULL; + + if (rsc_id == NULL) { + rc = -ENODEV; + } else { + rsc = g_hash_table_lookup(rsc_list, rsc_id); + if (rsc == NULL) { + crm_info("Agent information for '%s' not in cache", rsc_id); + rc = -ENODEV; + } + } + + reply = create_lrmd_reply(__func__, rc, call_id); + if (rsc) { + crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id); + crm_xml_add(reply, F_LRMD_CLASS, rsc->class); + crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider); + crm_xml_add(reply, F_LRMD_TYPE, rsc->type); + } + return reply; +} + +static int +process_lrmd_rsc_unregister(pcmk__client_t *client, uint32_t id, + xmlNode *request) +{ + int rc = pcmk_ok; + lrmd_rsc_t *rsc = NULL; + xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); + const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); + + if (!rsc_id) { + return -ENODEV; + } + + rsc = g_hash_table_lookup(rsc_list, rsc_id); + if (rsc == NULL) { + crm_info("Ignoring unregistration of resource '%s', which is not registered", + rsc_id); + return pcmk_ok; + } + + if (rsc->active) { + /* let the caller know there are still active ops on this rsc to watch for */ + crm_trace("Operation (%p) still in progress for unregistered resource %s", + rsc->active, rsc_id); + rc = -EINPROGRESS; + } + + g_hash_table_remove(rsc_list, rsc_id); + + return rc; +} + +static int +process_lrmd_rsc_exec(pcmk__client_t *client, uint32_t id, xmlNode *request) +{ + lrmd_rsc_t *rsc = NULL; + lrmd_cmd_t *cmd = NULL; + xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); + const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); + int call_id; + + if (!rsc_id) { + return -EINVAL; + } + if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { + crm_info("Resource '%s' not found (%d active resources)", + rsc_id, g_hash_table_size(rsc_list)); + return -ENODEV; + } + + cmd = create_lrmd_cmd(request, client); + call_id = cmd->call_id; + + /* Don't reference cmd after handing it off to be scheduled. + * The cmd could get merged and freed. */ + schedule_lrmd_cmd(rsc, cmd); + + return call_id; +} + +static int +cancel_op(const char *rsc_id, const char *action, guint interval_ms) +{ + GList *gIter = NULL; + lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id); + + /* How to cancel an action. + * 1. Check pending ops list, if it hasn't been handed off + * to the service library or stonith recurring list remove + * it there and that will stop it. + * 2. If it isn't in the pending ops list, then it's either a + * recurring op in the stonith recurring list, or the service + * library's recurring list. Stop it there + * 3. If not found in any lists, then this operation has either + * been executed already and is not a recurring operation, or + * never existed. + */ + if (!rsc) { + return -ENODEV; + } + + for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { + lrmd_cmd_t *cmd = gIter->data; + + if (action_matches(cmd, action, interval_ms)) { + cmd->result.execution_status = PCMK_EXEC_CANCELLED; + cmd_finalize(cmd, rsc); + return pcmk_ok; + } + } + + if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { + /* The service library does not handle stonith operations. + * We have to handle recurring stonith operations ourselves. */ + for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { + lrmd_cmd_t *cmd = gIter->data; + + if (action_matches(cmd, action, interval_ms)) { + cmd->result.execution_status = PCMK_EXEC_CANCELLED; + if (rsc->active != cmd) { + cmd_finalize(cmd, rsc); + } + return pcmk_ok; + } + } + } else if (services_action_cancel(rsc_id, + normalize_action_name(rsc, action), + interval_ms) == TRUE) { + /* The service library will tell the action_complete callback function + * this action was cancelled, which will destroy the cmd and remove + * it from the recurring_op list. Do not do that in this function + * if the service library says it cancelled it. */ + return pcmk_ok; + } + + return -EOPNOTSUPP; +} + +static void +cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id) +{ + GList *cmd_list = NULL; + GList *cmd_iter = NULL; + + /* Notice a copy of each list is created when concat is called. + * This prevents odd behavior from occurring when the cmd_list + * is iterated through later on. It is possible the cancel_op + * function may end up modifying the recurring_ops and pending_ops + * lists. If we did not copy those lists, our cmd_list iteration + * could get messed up.*/ + if (rsc->recurring_ops) { + cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops)); + } + if (rsc->pending_ops) { + cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops)); + } + if (!cmd_list) { + return; + } + + for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { + lrmd_cmd_t *cmd = cmd_iter->data; + + if (cmd->interval_ms == 0) { + continue; + } + + if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) { + continue; + } + + cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms); + } + /* frees only the copied list data, not the cmds */ + g_list_free(cmd_list); +} + +static int +process_lrmd_rsc_cancel(pcmk__client_t *client, uint32_t id, xmlNode *request) +{ + xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); + const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); + const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION); + guint interval_ms = 0; + + crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &interval_ms); + + if (!rsc_id || !action) { + return -EINVAL; + } + + return cancel_op(rsc_id, action, interval_ms); +} + +static void +add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc) +{ + xmlNode *rsc_xml = create_xml_node(reply, F_LRMD_RSC); + + crm_xml_add(rsc_xml, F_LRMD_RSC_ID, rsc->rsc_id); + for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) { + lrmd_cmd_t *cmd = item->data; + xmlNode *op_xml = create_xml_node(rsc_xml, T_LRMD_RSC_OP); + + crm_xml_add(op_xml, F_LRMD_RSC_ACTION, + (cmd->real_action? cmd->real_action : cmd->action)); + crm_xml_add_ms(op_xml, F_LRMD_RSC_INTERVAL, cmd->interval_ms); + crm_xml_add_int(op_xml, F_LRMD_TIMEOUT, cmd->timeout_orig); + } +} + +static xmlNode * +process_lrmd_get_recurring(xmlNode *request, int call_id) +{ + int rc = pcmk_ok; + const char *rsc_id = NULL; + lrmd_rsc_t *rsc = NULL; + xmlNode *reply = NULL; + xmlNode *rsc_xml = NULL; + + // Resource ID is optional + rsc_xml = first_named_child(request, F_LRMD_CALLDATA); + if (rsc_xml) { + rsc_xml = first_named_child(rsc_xml, F_LRMD_RSC); + } + if (rsc_xml) { + rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); + } + + // If resource ID is specified, resource must exist + if (rsc_id != NULL) { + rsc = g_hash_table_lookup(rsc_list, rsc_id); + if (rsc == NULL) { + crm_info("Resource '%s' not found (%d active resources)", + rsc_id, g_hash_table_size(rsc_list)); + rc = -ENODEV; + } + } + + reply = create_lrmd_reply(__func__, rc, call_id); + + // If resource ID is not specified, check all resources + if (rsc_id == NULL) { + GHashTableIter iter; + char *key = NULL; + + g_hash_table_iter_init(&iter, rsc_list); + while (g_hash_table_iter_next(&iter, (gpointer *) &key, + (gpointer *) &rsc)) { + add_recurring_op_xml(reply, rsc); + } + } else if (rsc) { + add_recurring_op_xml(reply, rsc); + } + return reply; +} + +void +process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request) +{ + int rc = pcmk_ok; + int call_id = 0; + const char *op = crm_element_value(request, F_LRMD_OPERATION); + int do_reply = 0; + int do_notify = 0; + xmlNode *reply = NULL; + + /* Certain IPC commands may be done only by privileged users (i.e. root or + * hacluster), because they would otherwise provide a means of bypassing + * ACLs. + */ + bool allowed = pcmk_is_set(client->flags, pcmk__client_privileged); + + crm_trace("Processing %s operation from %s", op, client->id); + crm_element_value_int(request, F_LRMD_CALLID, &call_id); + + if (pcmk__str_eq(op, CRM_OP_IPC_FWD, pcmk__str_none)) { +#ifdef PCMK__COMPILE_REMOTE + if (allowed) { + ipc_proxy_forward_client(client, request); + } else { + rc = -EACCES; + } +#else + rc = -EPROTONOSUPPORT; +#endif + do_reply = 1; + } else if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) { + rc = process_lrmd_signon(client, request, call_id, &reply); + do_reply = 1; + } else if (pcmk__str_eq(op, LRMD_OP_RSC_REG, pcmk__str_none)) { + if (allowed) { + rc = process_lrmd_rsc_register(client, id, request); + do_notify = 1; + } else { + rc = -EACCES; + } + do_reply = 1; + } else if (pcmk__str_eq(op, LRMD_OP_RSC_INFO, pcmk__str_none)) { + if (allowed) { + reply = process_lrmd_get_rsc_info(request, call_id); + } else { + rc = -EACCES; + } + do_reply = 1; + } else if (pcmk__str_eq(op, LRMD_OP_RSC_UNREG, pcmk__str_none)) { + if (allowed) { + rc = process_lrmd_rsc_unregister(client, id, request); + /* don't notify anyone about failed un-registers */ + if (rc == pcmk_ok || rc == -EINPROGRESS) { + do_notify = 1; + } + } else { + rc = -EACCES; + } + do_reply = 1; + } else if (pcmk__str_eq(op, LRMD_OP_RSC_EXEC, pcmk__str_none)) { + if (allowed) { + rc = process_lrmd_rsc_exec(client, id, request); + } else { + rc = -EACCES; + } + do_reply = 1; + } else if (pcmk__str_eq(op, LRMD_OP_RSC_CANCEL, pcmk__str_none)) { + if (allowed) { + rc = process_lrmd_rsc_cancel(client, id, request); + } else { + rc = -EACCES; + } + do_reply = 1; + } else if (pcmk__str_eq(op, LRMD_OP_POKE, pcmk__str_none)) { + do_notify = 1; + do_reply = 1; + } else if (pcmk__str_eq(op, LRMD_OP_CHECK, pcmk__str_none)) { + if (allowed) { + xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA); + + CRM_LOG_ASSERT(data != NULL); + pcmk__valid_sbd_timeout(crm_element_value(data, F_LRMD_WATCHDOG)); + } else { + rc = -EACCES; + } + } else if (pcmk__str_eq(op, LRMD_OP_ALERT_EXEC, pcmk__str_none)) { + if (allowed) { + rc = process_lrmd_alert_exec(client, id, request); + } else { + rc = -EACCES; + } + do_reply = 1; + } else if (pcmk__str_eq(op, LRMD_OP_GET_RECURRING, pcmk__str_none)) { + if (allowed) { + reply = process_lrmd_get_recurring(request, call_id); + } else { + rc = -EACCES; + } + do_reply = 1; + } else { + rc = -EOPNOTSUPP; + do_reply = 1; + crm_err("Unknown IPC request '%s' from client %s", + op, pcmk__client_name(client)); + } + + if (rc == -EACCES) { + crm_warn("Rejecting IPC request '%s' from unprivileged client %s", + op, pcmk__client_name(client)); + } + + crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d", + op, client->id, rc, do_reply, do_notify); + + if (do_reply) { + int send_rc = pcmk_rc_ok; + + if (reply == NULL) { + reply = create_lrmd_reply(__func__, rc, call_id); + } + send_rc = lrmd_server_send_reply(client, id, reply); + free_xml(reply); + if (send_rc != pcmk_rc_ok) { + crm_warn("Reply to client %s failed: %s " CRM_XS " rc=%d", + pcmk__client_name(client), pcmk_rc_str(send_rc), send_rc); + } + } + + if (do_notify) { + send_generic_notify(rc, request); + } +} diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c new file mode 100644 index 0000000..83a8cd7 --- /dev/null +++ b/daemons/execd/pacemaker-execd.c @@ -0,0 +1,582 @@ +/* + * Copyright 2012-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pacemaker-execd.h" + +#ifdef PCMK__COMPILE_REMOTE +# define EXECD_TYPE "remote" +# define EXECD_NAME "pacemaker-remoted" +# define SUMMARY "resource agent executor daemon for Pacemaker Remote nodes" +#else +# define EXECD_TYPE "local" +# define EXECD_NAME "pacemaker-execd" +# define SUMMARY "resource agent executor daemon for Pacemaker cluster nodes" +#endif + +static GMainLoop *mainloop = NULL; +static qb_ipcs_service_t *ipcs = NULL; +static stonith_t *stonith_api = NULL; +int lrmd_call_id = 0; +time_t start_time; + +static struct { + gchar **log_files; +#ifdef PCMK__COMPILE_REMOTE + gchar *port; +#endif // PCMK__COMPILE_REMOTE +} options; + +#ifdef PCMK__COMPILE_REMOTE +/* whether shutdown request has been sent */ +static gboolean shutting_down = FALSE; + +/* timer for waiting for acknowledgment of shutdown request */ +static guint shutdown_ack_timer = 0; + +static gboolean lrmd_exit(gpointer data); +#endif + +static void +stonith_connection_destroy_cb(stonith_t * st, stonith_event_t * e) +{ + stonith_api->state = stonith_disconnected; + stonith_connection_failed(); +} + +stonith_t * +get_stonith_connection(void) +{ + if (stonith_api && stonith_api->state == stonith_disconnected) { + stonith_api_delete(stonith_api); + stonith_api = NULL; + } + + if (stonith_api == NULL) { + int rc = pcmk_ok; + + stonith_api = stonith_api_new(); + if (stonith_api == NULL) { + crm_err("Could not connect to fencer: API memory allocation failed"); + return NULL; + } + rc = stonith_api_connect_retry(stonith_api, crm_system_name, 10); + if (rc != pcmk_ok) { + crm_err("Could not connect to fencer in 10 attempts: %s " + CRM_XS " rc=%d", pcmk_strerror(rc), rc); + stonith_api_delete(stonith_api); + stonith_api = NULL; + } else { + stonith_api->cmds->register_notification(stonith_api, + T_STONITH_NOTIFY_DISCONNECT, + stonith_connection_destroy_cb); + } + } + return stonith_api; +} + +static int32_t +lrmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) +{ + crm_trace("Connection %p", c); + if (pcmk__new_client(c, uid, gid) == NULL) { + return -EIO; + } + return 0; +} + +static void +lrmd_ipc_created(qb_ipcs_connection_t * c) +{ + pcmk__client_t *new_client = pcmk__find_client(c); + + crm_trace("Connection %p", c); + CRM_ASSERT(new_client != NULL); + /* Now that the connection is offically established, alert + * the other clients a new connection exists. */ + + notify_of_new_client(new_client); +} + +static int32_t +lrmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) +{ + uint32_t id = 0; + uint32_t flags = 0; + pcmk__client_t *client = pcmk__find_client(c); + xmlNode *request = pcmk__client_data2xml(client, data, &id, &flags); + + CRM_CHECK(client != NULL, crm_err("Invalid client"); + return FALSE); + CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client); + return FALSE); + + CRM_CHECK(flags & crm_ipc_client_response, crm_err("Invalid client request: %p", client); + return FALSE); + + if (!request) { + return 0; + } + + if (!client->name) { + const char *value = crm_element_value(request, F_LRMD_CLIENTNAME); + + if (value == NULL) { + client->name = pcmk__itoa(pcmk__client_pid(c)); + } else { + client->name = strdup(value); + } + } + + lrmd_call_id++; + if (lrmd_call_id < 1) { + lrmd_call_id = 1; + } + + crm_xml_add(request, F_LRMD_CLIENTID, client->id); + crm_xml_add(request, F_LRMD_CLIENTNAME, client->name); + crm_xml_add_int(request, F_LRMD_CALLID, lrmd_call_id); + + process_lrmd_message(client, id, request); + + free_xml(request); + return 0; +} + +/*! + * \internal + * \brief Free a client connection, and exit if appropriate + * + * \param[in,out] client Client connection to free + */ +void +lrmd_client_destroy(pcmk__client_t *client) +{ + pcmk__free_client(client); + +#ifdef PCMK__COMPILE_REMOTE + /* If we were waiting to shut down, we can now safely do so + * if there are no more proxied IPC providers + */ + if (shutting_down && (ipc_proxy_get_provider() == NULL)) { + lrmd_exit(NULL); + } +#endif +} + +static int32_t +lrmd_ipc_closed(qb_ipcs_connection_t * c) +{ + pcmk__client_t *client = pcmk__find_client(c); + + if (client == NULL) { + return 0; + } + + crm_trace("Connection %p", c); + client_disconnect_cleanup(client->id); +#ifdef PCMK__COMPILE_REMOTE + ipc_proxy_remove_provider(client); +#endif + lrmd_client_destroy(client); + return 0; +} + +static void +lrmd_ipc_destroy(qb_ipcs_connection_t * c) +{ + lrmd_ipc_closed(c); + crm_trace("Connection %p", c); +} + +static struct qb_ipcs_service_handlers lrmd_ipc_callbacks = { + .connection_accept = lrmd_ipc_accept, + .connection_created = lrmd_ipc_created, + .msg_process = lrmd_ipc_dispatch, + .connection_closed = lrmd_ipc_closed, + .connection_destroyed = lrmd_ipc_destroy +}; + +// \return Standard Pacemaker return code +int +lrmd_server_send_reply(pcmk__client_t *client, uint32_t id, xmlNode *reply) +{ + crm_trace("Sending reply (%d) to client (%s)", id, client->id); + switch (PCMK__CLIENT_TYPE(client)) { + case pcmk__client_ipc: + return pcmk__ipc_send_xml(client, id, reply, FALSE); +#ifdef PCMK__COMPILE_REMOTE + case pcmk__client_tls: + return lrmd__remote_send_xml(client->remote, reply, id, "reply"); +#endif + default: + crm_err("Could not send reply: unknown type for client %s " + CRM_XS " flags=%#llx", + pcmk__client_name(client), client->flags); + } + return ENOTCONN; +} + +// \return Standard Pacemaker return code +int +lrmd_server_send_notify(pcmk__client_t *client, xmlNode *msg) +{ + crm_trace("Sending notification to client (%s)", client->id); + switch (PCMK__CLIENT_TYPE(client)) { + case pcmk__client_ipc: + if (client->ipcs == NULL) { + crm_trace("Could not notify local client: disconnected"); + return ENOTCONN; + } + return pcmk__ipc_send_xml(client, 0, msg, crm_ipc_server_event); +#ifdef PCMK__COMPILE_REMOTE + case pcmk__client_tls: + if (client->remote == NULL) { + crm_trace("Could not notify remote client: disconnected"); + return ENOTCONN; + } else { + return lrmd__remote_send_xml(client->remote, msg, 0, "notify"); + } +#endif + default: + crm_err("Could not notify client %s with unknown transport " + CRM_XS " flags=%#llx", + pcmk__client_name(client), client->flags); + } + return ENOTCONN; +} + +/*! + * \internal + * \brief Clean up and exit immediately + * + * \param[in] data Ignored + * + * \return Doesn't return + * \note This can be used as a timer callback. + */ +static gboolean +lrmd_exit(gpointer data) +{ + crm_info("Terminating with %d clients", pcmk__ipc_client_count()); + if (stonith_api) { + stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT); + stonith_api->cmds->disconnect(stonith_api); + stonith_api_delete(stonith_api); + } + if (ipcs) { + mainloop_del_ipc_server(ipcs); + } + +#ifdef PCMK__COMPILE_REMOTE + execd_stop_tls_server(); + ipc_proxy_cleanup(); +#endif + + pcmk__client_cleanup(); + g_hash_table_destroy(rsc_list); + + if (mainloop) { + lrmd_drain_alerts(mainloop); + } + + crm_exit(CRM_EX_OK); + return FALSE; +} + +/*! + * \internal + * \brief Request cluster shutdown if appropriate, otherwise exit immediately + * + * \param[in] nsig Signal that caused invocation (ignored) + */ +static void +lrmd_shutdown(int nsig) +{ +#ifdef PCMK__COMPILE_REMOTE + pcmk__client_t *ipc_proxy = ipc_proxy_get_provider(); + + /* If there are active proxied IPC providers, then we may be running + * resources, so notify the cluster that we wish to shut down. + */ + if (ipc_proxy) { + if (shutting_down) { + crm_notice("Waiting for cluster to stop resources before exiting"); + return; + } + + crm_info("Sending shutdown request to cluster"); + if (ipc_proxy_shutdown_req(ipc_proxy) < 0) { + crm_crit("Shutdown request failed, exiting immediately"); + + } else { + /* We requested a shutdown. Now, we need to wait for an + * acknowledgement from the proxy host (which ensures the proxy host + * supports shutdown requests), then wait for all proxy hosts to + * disconnect (which ensures that all resources have been stopped). + */ + shutting_down = TRUE; + + /* Stop accepting new proxy connections */ + execd_stop_tls_server(); + + /* Older controller versions will never acknowledge our request, so + * set a fairly short timeout to exit quickly in that case. If we + * get the ack, we'll defuse this timer. + */ + shutdown_ack_timer = g_timeout_add_seconds(20, lrmd_exit, NULL); + + /* Currently, we let the OS kill us if the clients don't disconnect + * in a reasonable time. We could instead set a long timer here + * (shorter than what the OS is likely to use) and exit immediately + * if it pops. + */ + return; + } + } +#endif + lrmd_exit(NULL); +} + +/*! + * \internal + * \brief Defuse short exit timer if shutting down + */ +void +handle_shutdown_ack(void) +{ +#ifdef PCMK__COMPILE_REMOTE + if (shutting_down) { + crm_info("Received shutdown ack"); + if (shutdown_ack_timer > 0) { + g_source_remove(shutdown_ack_timer); + shutdown_ack_timer = 0; + } + return; + } +#endif + crm_debug("Ignoring unexpected shutdown ack"); +} + +/*! + * \internal + * \brief Make short exit timer fire immediately + */ +void +handle_shutdown_nack(void) +{ +#ifdef PCMK__COMPILE_REMOTE + if (shutting_down) { + crm_info("Received shutdown nack"); + if (shutdown_ack_timer > 0) { + g_source_remove(shutdown_ack_timer); + shutdown_ack_timer = g_timeout_add(0, lrmd_exit, NULL); + } + return; + } +#endif + crm_debug("Ignoring unexpected shutdown nack"); +} + +static GOptionEntry entries[] = { + { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, + &options.log_files, "Send logs to the additional named logfile", NULL }, + +#ifdef PCMK__COMPILE_REMOTE + + { "port", 'p', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.port, + "Port to listen on (defaults to " G_STRINGIFY(DEFAULT_REMOTE_PORT) ")", NULL }, +#endif // PCMK__COMPILE_REMOTE + + { NULL } +}; + +static pcmk__supported_format_t formats[] = { + PCMK__SUPPORTED_FORMAT_NONE, + PCMK__SUPPORTED_FORMAT_TEXT, + PCMK__SUPPORTED_FORMAT_XML, + { NULL, NULL, NULL } +}; + +static GOptionContext * +build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) +{ + GOptionContext *context = NULL; + + context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); + pcmk__add_main_args(context, entries); + return context; +} + +int +main(int argc, char **argv, char **envp) +{ + int rc = pcmk_rc_ok; + crm_exit_t exit_code = CRM_EX_OK; + + const char *option = NULL; + + pcmk__output_t *out = NULL; + + GError *error = NULL; + + GOptionGroup *output_group = NULL; + pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); +#ifdef PCMK__COMPILE_REMOTE + gchar **processed_args = pcmk__cmdline_preproc(argv, "lp"); +#else + gchar **processed_args = pcmk__cmdline_preproc(argv, "l"); +#endif // PCMK__COMPILE_REMOTE + GOptionContext *context = build_arg_context(args, &output_group); + +#ifdef PCMK__COMPILE_REMOTE + // If necessary, create PID 1 now before any file descriptors are opened + remoted_spawn_pidone(argc, argv, envp); +#endif + + crm_log_preinit(EXECD_NAME, argc, argv); + + pcmk__register_formats(output_group, formats); + if (!g_option_context_parse_strv(context, &processed_args, &error)) { + exit_code = CRM_EX_USAGE; + goto done; + } + + rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); + if (rc != pcmk_rc_ok) { + exit_code = CRM_EX_ERROR; + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "Error creating output format %s: %s", + args->output_ty, pcmk_rc_str(rc)); + goto done; + } + + if (args->version) { + out->version(out, false); + goto done; + } + + // Open additional log files + if (options.log_files != NULL) { + for (gchar **fname = options.log_files; *fname != NULL; fname++) { + rc = pcmk__add_logfile(*fname); + + if (rc != pcmk_rc_ok) { + out->err(out, "Logging to %s is disabled: %s", + *fname, pcmk_rc_str(rc)); + } + } + } + + pcmk__cli_init_logging(EXECD_NAME, args->verbosity); + crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); + + option = pcmk__env_option(PCMK__ENV_LOGFACILITY); + if (!pcmk__str_eq(option, PCMK__VALUE_NONE, + pcmk__str_casei|pcmk__str_null_matches) + && !pcmk__str_eq(option, "/dev/null", pcmk__str_none)) { + setenv("HA_LOGFACILITY", option, 1); /* Used by the ocf_log/ha_log OCF macro */ + } + + option = pcmk__env_option(PCMK__ENV_LOGFILE); + if (!pcmk__str_eq(option, PCMK__VALUE_NONE, + pcmk__str_casei|pcmk__str_null_matches)) { + setenv("HA_LOGFILE", option, 1); /* Used by the ocf_log/ha_log OCF macro */ + + if (pcmk__env_option_enabled(crm_system_name, PCMK__ENV_DEBUG)) { + setenv("HA_DEBUGLOG", option, 1); /* Used by the ocf_log/ha_debug OCF macro */ + } + } + +#ifdef PCMK__COMPILE_REMOTE + if (options.port != NULL) { + setenv("PCMK_remote_port", options.port, 1); + } +#endif // PCMK__COMPILE_REMOTE + + start_time = time(NULL); + + crm_notice("Starting Pacemaker " EXECD_TYPE " executor"); + + /* The presence of this variable allegedly controls whether child + * processes like httpd will try and use Systemd's sd_notify + * API + */ + unsetenv("NOTIFY_SOCKET"); + + { + // Temporary directory for resource agent use (leave owned by root) + int rc = pcmk__build_path(CRM_RSCTMP_DIR, 0755); + + if (rc != pcmk_rc_ok) { + crm_warn("Could not create resource agent temporary directory " + CRM_RSCTMP_DIR ": %s", pcmk_rc_str(rc)); + } + } + + rsc_list = pcmk__strkey_table(NULL, free_rsc); + ipcs = mainloop_add_ipc_server(CRM_SYSTEM_LRMD, QB_IPC_SHM, &lrmd_ipc_callbacks); + if (ipcs == NULL) { + crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); + exit_code = CRM_EX_FATAL; + goto done; + } + +#ifdef PCMK__COMPILE_REMOTE + if (lrmd_init_remote_tls_server() < 0) { + crm_err("Failed to create TLS listener: shutting down and staying down"); + exit_code = CRM_EX_FATAL; + goto done; + } + ipc_proxy_init(); +#endif + + mainloop_add_signal(SIGTERM, lrmd_shutdown); + mainloop = g_main_loop_new(NULL, FALSE); + crm_notice("Pacemaker " EXECD_TYPE " executor successfully started and accepting connections"); + crm_notice("OCF resource agent search path is %s", OCF_RA_PATH); + g_main_loop_run(mainloop); + + /* should never get here */ + lrmd_exit(NULL); + +done: + g_strfreev(options.log_files); +#ifdef PCMK__COMPILE_REMOTE + g_free(options.port); +#endif // PCMK__COMPILE_REMOTE + + g_strfreev(processed_args); + pcmk__free_arg_context(context); + + pcmk__output_and_clear_error(&error, out); + + if (out != NULL) { + out->finish(out, exit_code, true, NULL); + pcmk__output_free(out); + } + pcmk__unregister_formats(); + crm_exit(exit_code); +} diff --git a/daemons/execd/pacemaker-execd.h b/daemons/execd/pacemaker-execd.h new file mode 100644 index 0000000..9c1d173 --- /dev/null +++ b/daemons/execd/pacemaker-execd.h @@ -0,0 +1,110 @@ +/* + * Copyright 2012-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef PACEMAKER_EXECD__H +# define PACEMAKER_EXECD__H + +# include +# include +# include +# include + +# ifdef HAVE_GNUTLS_GNUTLS_H +# include +# endif + +extern GHashTable *rsc_list; +extern time_t start_time; + +typedef struct lrmd_rsc_s { + char *rsc_id; + char *class; + char *provider; + char *type; + + int call_opts; + + /* NEVER dereference this pointer, + * It simply exists as a switch to let us know + * when the currently active operation has completed */ + void *active; + + /* Operations in this list + * have not been executed yet. */ + GList *pending_ops; + /* Operations in this list are recurring operations + * that have been handed off from the pending ops list. */ + GList *recurring_ops; + + /* If this resource is a fence device, probes are handled internally by the + * executor, and this value indicates the result that should currently be + * returned for probes. It should be one of: + * PCMK_EXEC_DONE (to indicate "running"), + * PCMK_EXEC_NO_FENCE_DEVICE ("not running"), or + * PCMK_EXEC_NOT_CONNECTED ("unknown because fencer connection was lost"). + */ + pcmk__action_result_t fence_probe_result; + + crm_trigger_t *work; +} lrmd_rsc_t; + +# ifdef HAVE_GNUTLS_GNUTLS_H +// in remoted_tls.c +int lrmd_init_remote_tls_server(void); +void execd_stop_tls_server(void); +# endif + +int lrmd_server_send_reply(pcmk__client_t *client, uint32_t id, xmlNode *reply); + +int lrmd_server_send_notify(pcmk__client_t *client, xmlNode *msg); + +void notify_of_new_client(pcmk__client_t *new_client); + +void process_lrmd_message(pcmk__client_t *client, uint32_t id, + xmlNode *request); + +void free_rsc(gpointer data); + +void handle_shutdown_ack(void); + +void handle_shutdown_nack(void); + +void lrmd_client_destroy(pcmk__client_t *client); + +void client_disconnect_cleanup(const char *client_id); + +/*! + * \brief Don't worry about freeing this connection. It is + * taken care of after mainloop exits by the main() function. + */ +stonith_t *get_stonith_connection(void); + +/*! + * \brief This is a callback that tells the lrmd + * the current stonith connection has gone away. This allows + * us to timeout any pending stonith commands + */ +void stonith_connection_failed(void); + +#ifdef PCMK__COMPILE_REMOTE +void ipc_proxy_init(void); +void ipc_proxy_cleanup(void); +void ipc_proxy_add_provider(pcmk__client_t *client); +void ipc_proxy_remove_provider(pcmk__client_t *client); +void ipc_proxy_forward_client(pcmk__client_t *client, xmlNode *xml); +pcmk__client_t *ipc_proxy_get_provider(void); +int ipc_proxy_shutdown_req(pcmk__client_t *ipc_proxy); +void remoted_spawn_pidone(int argc, char **argv, char **envp); +#endif + +int process_lrmd_alert_exec(pcmk__client_t *client, uint32_t id, + xmlNode *request); +void lrmd_drain_alerts(GMainLoop *mloop); + +#endif // PACEMAKER_EXECD__H diff --git a/daemons/execd/pacemaker-remoted.8.inc b/daemons/execd/pacemaker-remoted.8.inc new file mode 100644 index 0000000..bc86acc --- /dev/null +++ b/daemons/execd/pacemaker-remoted.8.inc @@ -0,0 +1,5 @@ +[synopsis] +pacemaker-remoted [options] + +/for Pacemaker Remote nodes/ +.SH OPTIONS diff --git a/daemons/execd/pacemaker_remote.in b/daemons/execd/pacemaker_remote.in new file mode 100644 index 0000000..2096c5f --- /dev/null +++ b/daemons/execd/pacemaker_remote.in @@ -0,0 +1,176 @@ +#!@BASH_PATH@ + +# Authors: +# Andrew Beekhof +# +# License: Revised BSD + +# chkconfig: - 99 01 +# description: Pacemaker Cluster Manager +# processname: pacemaker-remoted +# +### BEGIN INIT INFO +# Provides: pacemaker_remote +# Required-Start: $network $remote_fs +# Should-Start: $syslog +# Required-Stop: $network $remote_fs +# Default-Start: +# Default-Stop: +# Short-Description: Manage the executor for Pacemaker Remote nodes +# Description: Manage the executor for Pacemaker Remote nodes +### END INIT INFO + +desc="Pacemaker Remote Executor" +prog="pacemaker-remoted" + +# set secure PATH +PATH="/sbin:/bin:/usr/sbin:/usr/bin:@sbindir@" + +checkrc() { + if [ $? = 0 ]; then + success + else + failure + fi +} + +success() +{ + echo -ne "[ OK ]\r" +} + +failure() +{ + echo -ne "[FAILED]\r" +} + +status() +{ + pid=$(pidof $1 2>/dev/null) + local rtrn=$? + if [ $rtrn -ne 0 ]; then + echo "$1 is stopped" + if [ -f "@localstatedir@/run/$prog.pid" ]; then + rtrn=1 + else + rtrn=3 + fi + else + echo "$1 (pid $pid) is running..." + fi + return $rtrn +} + +if [ -d @CONFIGDIR@ ]; then + [ -f @INITDIR@/functions ] && . @INITDIR@/functions +set -a + [ -f @CONFIGDIR@/pacemaker ] && . @CONFIGDIR@/pacemaker + [ -f @CONFIGDIR@/sbd ] && . @CONFIGDIR@/sbd +set +a +fi + +LOCK_DIR="." +if [ -d "@localstatedir@/lock/subsys" ]; then + LOCK_DIR="@localstatedir@/lock/subsys" +elif [ -d "@localstatedir@/lock" ]; then + LOCK_DIR="@localstatedir@/lock" +fi +[ -z "$LOCK_FILE" ] && LOCK_FILE="$LOCK_DIR/pacemaker_remote" + +# Check if there is a valid watchdog-device configured in sbd config +if [ x != "x$SBD_WATCHDOG_DEV" -a "/dev/null" != "$SBD_WATCHDOG_DEV" -a -c "$SBD_WATCHDOG_DEV" ]; then + # enhance for unavailable chkconfig - don't touch sbd for now + if chkconfig --list sbd_remote_helper 2>/dev/null | grep -q ":on"; then + SBD_SERVICE=sbd_remote_helper + fi +fi + +start() +{ + echo -n "Starting $desc: " + + # most recent distributions use tmpfs for $@localstatedir@/run + # to avoid to clean it up on every boot. + # they also assume that init scripts will create + # required subdirectories for proper operations + mkdir -p "@localstatedir@/run" + + if status $prog > /dev/null 2>&1; then + success + else + $prog > /dev/null 2>&1 & + + # Time to connect to corosync and fail + sleep 5 + + if status $prog > /dev/null 2>&1; then + touch "$LOCK_FILE" + pidof $prog > "@localstatedir@/run/$prog.pid" + success + else + failure + rtrn=1 + fi + fi + echo + + [ "x$SBD_SERVICE" = "x" ] || service $SBD_SERVICE start +} + +stop() +{ + if status $prog > /dev/null 2>&1; then + echo -n "Signaling $desc to terminate: " + kill -TERM $(pidof $prog) > /dev/null 2>&1 + success + echo + + echo -n "Waiting for $desc to unload:" + while status $prog > /dev/null 2>&1; do + sleep 1 + echo -n "." + done + else + echo -n "$desc is already stopped" + fi + + rm -f "$LOCK_FILE" + rm -f "@localstatedir@/run/$prog.pid" + success + echo + + [ "x$SBD_SERVICE" = "x" ] || service $SBD_SERVICE stop +} + +rtrn=0 + +case "$1" in +start) + start +;; +restart|reload|force-reload) + stop + start +;; +condrestart|try-restart) + if status $prog > /dev/null 2>&1; then + stop + start + rtrn=$? + fi +;; +status) + status $prog + rtrn=$? +;; +stop) + stop + rtrn=$? +;; +*) + echo "usage: $0 {start|stop|restart|reload|force-reload|condrestart|try-restart|status}" + rtrn=2 +;; +esac + +exit $rtrn diff --git a/daemons/execd/pacemaker_remote.service.in b/daemons/execd/pacemaker_remote.service.in new file mode 100644 index 0000000..1e48d14 --- /dev/null +++ b/daemons/execd/pacemaker_remote.service.in @@ -0,0 +1,52 @@ +[Unit] +Description=Pacemaker Remote executor daemon +Documentation=man:pacemaker-remoted +Documentation=https://clusterlabs.org/pacemaker/doc/ + +# See main pacemaker unit file for descriptions of why these are needed +After=network.target +After=time-sync.target +After=dbus.service +Wants=dbus.service +After=resource-agents-deps.target +Wants=resource-agents-deps.target +After=syslog.service +After=rsyslog.service + +[Install] +Alias=pacemaker-remote.service +WantedBy=multi-user.target + +[Service] +Type=simple +KillMode=process +NotifyAccess=none +EnvironmentFile=-@CONFIGDIR@/pacemaker +EnvironmentFile=-@CONFIGDIR@/sbd + +# Not actually success, but fatal failure -- this ensures no respawn +SuccessExitStatus=100 + +ExecStart=@sbindir@/pacemaker-remoted + +# Systemd v227 and above can limit the number of processes spawned by a +# service. That is a bad idea for an HA cluster resource manager, so disable it +# by default. The administrator can create a local override if they really want +# a limit. If your systemd version does not support TasksMax, and you want to +# get rid of the resulting log warnings, comment out this option. +TasksMax=infinity + +# If connected to the cluster and when the service functions properly, it will +# wait to exit until the cluster notifies it all resources on the remote node +# have been stopped. The default of 30min should cover most typical cluster +# configurations, but it may need an increase to adapt to local conditions +# (e.g. a large, clustered database could conceivably take longer to stop). +TimeoutStopSec=30min +TimeoutStartSec=30s + +# Restart options include: no, on-success, on-failure, on-abort or always +Restart=on-failure + +# crm_perror() writes directly to stderr, so ignore it here +# to avoid double-logging with the wrong format +StandardError=null diff --git a/daemons/execd/remoted_pidone.c b/daemons/execd/remoted_pidone.c new file mode 100644 index 0000000..4f914eb --- /dev/null +++ b/daemons/execd/remoted_pidone.c @@ -0,0 +1,298 @@ +/* + * Copyright 2017-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include "pacemaker-execd.h" + +static pid_t main_pid = 0; + +static void +sigdone(void) +{ + exit(CRM_EX_OK); +} + +static void +sigreap(void) +{ + pid_t pid = 0; + int status; + + do { + /* + * Opinions seem to differ as to what to put here: + * -1, any child process + * 0, any child process whose process group ID is equal to that of the calling process + */ + pid = waitpid(-1, &status, WNOHANG); + if (pid == main_pid) { + /* Exit when pacemaker-remote exits and use the same return code */ + if (WIFEXITED(status)) { + exit(WEXITSTATUS(status)); + } + exit(CRM_EX_ERROR); + } + } while (pid > 0); +} + +static struct { + int sig; + void (*handler)(void); +} sigmap[] = { + { SIGCHLD, sigreap }, + { SIGINT, sigdone }, +}; + +/*! + * \internal + * \brief Check a line of text for a valid environment variable name + * + * \param[in] line Text to check + * \param[out] first First character of valid name if found, NULL otherwise + * \param[out] last Last character of valid name if found, NULL otherwise + * + * \return TRUE if valid name found, FALSE otherwise + * \note It's reasonable to impose limitations on environment variable names + * beyond what C or setenv() does: We only allow names that contain only + * [a-zA-Z0-9_] characters and do not start with a digit. + */ +static bool +find_env_var_name(char *line, char **first, char **last) +{ + // Skip leading whitespace + *first = line; + while (isspace(**first)) { + ++*first; + } + + if (isalpha(**first) || (**first == '_')) { // Valid first character + *last = *first; + while (isalnum(*(*last + 1)) || (*(*last + 1) == '_')) { + ++*last; + } + return TRUE; + } + + *first = *last = NULL; + return FALSE; +} + +static void +load_env_vars(const char *filename) +{ + /* We haven't forked or initialized logging yet, so don't leave any file + * descriptors open, and don't log -- silently ignore errors. + */ + FILE *fp = fopen(filename, "r"); + + if (fp != NULL) { + char line[LINE_MAX] = { '\0', }; + + while (fgets(line, LINE_MAX, fp) != NULL) { + char *name = NULL; + char *end = NULL; + char *value = NULL; + char *quote = NULL; + + // Look for valid name immediately followed by equals sign + if (find_env_var_name(line, &name, &end) && (*++end == '=')) { + + // Null-terminate name, and advance beyond equals sign + *end++ = '\0'; + + // Check whether value is quoted + if ((*end == '\'') || (*end == '"')) { + quote = end++; + } + value = end; + + if (quote) { + /* Value is remaining characters up to next non-backslashed + * matching quote character. + */ + while (((*end != *quote) || (*(end - 1) == '\\')) + && (*end != '\0')) { + end++; + } + if (*end == *quote) { + // Null-terminate value, and advance beyond close quote + *end++ = '\0'; + } else { + // Matching closing quote wasn't found + value = NULL; + } + + } else { + /* Value is remaining characters up to next non-backslashed + * whitespace. + */ + while ((!isspace(*end) || (*(end - 1) == '\\')) + && (*end != '\0')) { + ++end; + } + + if (end == (line + LINE_MAX - 1)) { + // Line was too long + value = NULL; + } + // Do NOT null-terminate value (yet) + } + + /* We have a valid name and value, and end is now the character + * after the closing quote or the first whitespace after the + * unquoted value. Make sure the rest of the line is just + * whitespace or a comment. + */ + if (value) { + char *value_end = end; + + while (isspace(*end) && (*end != '\n')) { + ++end; + } + if ((*end == '\n') || (*end == '#')) { + if (quote == NULL) { + // Now we can null-terminate an unquoted value + *value_end = '\0'; + } + + // Don't overwrite (bundle options take precedence) + setenv(name, value, 0); + + } else { + value = NULL; + } + } + } + + if ((value == NULL) && (strchr(line, '\n') == NULL)) { + // Eat remainder of line beyond LINE_MAX + if (fscanf(fp, "%*[^\n]\n") == EOF) { + value = NULL; // Don't care, make compiler happy + } + } + } + fclose(fp); + } +} + +void +remoted_spawn_pidone(int argc, char **argv, char **envp) +{ + sigset_t set; + + /* This environment variable exists for two purposes: + * - For testing, setting it to "full" enables full PID 1 behavior even + * when PID is not 1 + * - Setting to "vars" enables just the loading of environment variables + * from /etc/pacemaker/pcmk-init.env, which could be useful for testing or + * containers with a custom PID 1 script that launches pacemaker-remoted. + */ + const char *pid1 = (getpid() == 1)? "full" : getenv("PCMK_remote_pid1"); + + if (pid1 == NULL) { + return; + } + + /* When a container is launched, it may be given specific environment + * variables, which for Pacemaker bundles are given in the bundle + * configuration. However, that does not allow for host-specific values. + * To allow for that, look for a special file containing a shell-like syntax + * of name/value pairs, and export those into the environment. + */ + load_env_vars("/etc/pacemaker/pcmk-init.env"); + + if (strcmp(pid1, "full")) { + return; + } + + /* Containers can be expected to have /var/log, but they may not have + * /var/log/pacemaker, so use a different default if no value has been + * explicitly configured in the container's environment. + */ + if (pcmk__env_option(PCMK__ENV_LOGFILE) == NULL) { + pcmk__set_env_option(PCMK__ENV_LOGFILE, "/var/log/pcmk-init.log"); + } + + sigfillset(&set); + sigprocmask(SIG_BLOCK, &set, 0); + + main_pid = fork(); + switch (main_pid) { + case 0: + sigprocmask(SIG_UNBLOCK, &set, NULL); + setsid(); + setpgid(0, 0); + + // Child remains as pacemaker-remoted + return; + case -1: + perror("fork"); + } + + /* Parent becomes the reaper of zombie processes */ + /* Safe to initialize logging now if needed */ + +# ifdef HAVE_PROGNAME + /* Differentiate ourselves in the 'ps' output */ + { + char *p; + int i, maxlen; + char *LastArgv = NULL; + const char *name = "pcmk-init"; + + for (i = 0; i < argc; i++) { + if (!i || (LastArgv + 1 == argv[i])) + LastArgv = argv[i] + strlen(argv[i]); + } + + for (i = 0; envp[i] != NULL; i++) { + if ((LastArgv + 1) == envp[i]) { + LastArgv = envp[i] + strlen(envp[i]); + } + } + + maxlen = (LastArgv - argv[0]) - 2; + + i = strlen(name); + + /* We can overwrite individual argv[] arguments */ + snprintf(argv[0], maxlen, "%s", name); + + /* Now zero out everything else */ + p = &argv[0][i]; + while (p < LastArgv) { + *p++ = '\0'; + } + argv[1] = NULL; + } +# endif // HAVE_PROGNAME + + while (1) { + int sig; + size_t i; + + sigwait(&set, &sig); + for (i = 0; i < PCMK__NELEM(sigmap); i++) { + if (sigmap[i].sig == sig) { + sigmap[i].handler(); + break; + } + } + } +} diff --git a/daemons/execd/remoted_proxy.c b/daemons/execd/remoted_proxy.c new file mode 100644 index 0000000..62c8c3a --- /dev/null +++ b/daemons/execd/remoted_proxy.c @@ -0,0 +1,470 @@ +/* + * Copyright 2012-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include + +#include "pacemaker-execd.h" +#include +#include +#include +#include +#include +#include +#include +#include + +static qb_ipcs_service_t *cib_ro = NULL; +static qb_ipcs_service_t *cib_rw = NULL; +static qb_ipcs_service_t *cib_shm = NULL; + +static qb_ipcs_service_t *attrd_ipcs = NULL; +static qb_ipcs_service_t *crmd_ipcs = NULL; +static qb_ipcs_service_t *stonith_ipcs = NULL; +static qb_ipcs_service_t *pacemakerd_ipcs = NULL; + +// An IPC provider is a cluster node controller connecting as a client +static GList *ipc_providers = NULL; +/* ipc clients == things like cibadmin, crm_resource, connecting locally */ +static GHashTable *ipc_clients = NULL; + +/*! + * \internal + * \brief Get an IPC proxy provider + * + * \return Pointer to a provider if one exists, NULL otherwise + * + * \note Grab the first provider, which is the most recent connection. That way, + * if we haven't yet timed out an old, failed connection, we don't try to + * use it. + */ +pcmk__client_t * +ipc_proxy_get_provider(void) +{ + return ipc_providers? (pcmk__client_t *) (ipc_providers->data) : NULL; +} + +/*! + * \internal + * \brief Accept a client connection on a proxy IPC server + * + * \param[in] c Client's IPC connection + * \param[in] uid Client's user ID + * \param[in] gid Client's group ID + * \param[in] ipc_channel Name of IPC server to proxy + * + * \return pcmk_ok on success, -errno on error + */ +static int32_t +ipc_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid, const char *ipc_channel) +{ + pcmk__client_t *client; + pcmk__client_t *ipc_proxy = ipc_proxy_get_provider(); + xmlNode *msg; + + if (ipc_proxy == NULL) { + crm_warn("Cannot proxy IPC connection from uid %d gid %d to %s " + "because not connected to cluster", uid, gid, ipc_channel); + return -EREMOTEIO; + } + + /* This new client is a local IPC client on a Pacemaker Remote controlled + * node, needing to access cluster node IPC services. + */ + client = pcmk__new_client(c, uid, gid); + if (client == NULL) { + return -EREMOTEIO; + } + + /* This ipc client is bound to a single ipc provider. If the + * provider goes away, this client is disconnected */ + client->userdata = strdup(ipc_proxy->id); + client->name = crm_strdup_printf("proxy-%s-%d-%.8s", ipc_channel, client->pid, client->id); + + /* Allow remote executor to distinguish between proxied local clients and + * actual executor API clients + */ + pcmk__set_client_flags(client, pcmk__client_to_proxy); + + g_hash_table_insert(ipc_clients, client->id, client); + + msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); + crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_NEW); + crm_xml_add(msg, F_LRMD_IPC_IPC_SERVER, ipc_channel); + crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id); + lrmd_server_send_notify(ipc_proxy, msg); + free_xml(msg); + crm_debug("Accepted IPC proxy connection (session ID %s) " + "from uid %d gid %d on channel %s", + client->id, uid, gid, ipc_channel); + return 0; +} + +static int32_t +crmd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) +{ + return ipc_proxy_accept(c, uid, gid, CRM_SYSTEM_CRMD); +} + +static int32_t +attrd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) +{ + return ipc_proxy_accept(c, uid, gid, T_ATTRD); +} + +static int32_t +stonith_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) +{ + return ipc_proxy_accept(c, uid, gid, "stonith-ng"); +} + +static int32_t +pacemakerd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) +{ + return -EREMOTEIO; +} + +static int32_t +cib_proxy_accept_rw(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) +{ + return ipc_proxy_accept(c, uid, gid, PCMK__SERVER_BASED_RW); +} + +static int32_t +cib_proxy_accept_ro(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) +{ + return ipc_proxy_accept(c, uid, gid, PCMK__SERVER_BASED_RO); +} + +void +ipc_proxy_forward_client(pcmk__client_t *ipc_proxy, xmlNode *xml) +{ + const char *session = crm_element_value(xml, F_LRMD_IPC_SESSION); + const char *msg_type = crm_element_value(xml, F_LRMD_IPC_OP); + xmlNode *msg = get_message_xml(xml, F_LRMD_IPC_MSG); + pcmk__client_t *ipc_client; + int rc = pcmk_rc_ok; + + /* If the IPC provider is acknowledging our shutdown request, + * defuse the short exit timer to give the cluster time to + * stop any resources we're running. + */ + if (pcmk__str_eq(msg_type, LRMD_IPC_OP_SHUTDOWN_ACK, pcmk__str_casei)) { + handle_shutdown_ack(); + return; + } + + if (pcmk__str_eq(msg_type, LRMD_IPC_OP_SHUTDOWN_NACK, pcmk__str_casei)) { + handle_shutdown_nack(); + return; + } + + ipc_client = pcmk__find_client_by_id(session); + if (ipc_client == NULL) { + xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); + crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_DESTROY); + crm_xml_add(msg, F_LRMD_IPC_SESSION, session); + lrmd_server_send_notify(ipc_proxy, msg); + free_xml(msg); + return; + } + + /* This is an event or response from the ipc provider + * going to the local ipc client. + * + * Looking at the chain of events. + * + * -----remote node----------------|---- cluster node ------ + * ipc_client <--1--> this code + * <--2--> pacemaker-controld:remote_proxy_cb/remote_proxy_relay_event() + * <--3--> ipc server + * + * This function is receiving a msg from connection 2 + * and forwarding it to connection 1. + */ + + if (pcmk__str_eq(msg_type, LRMD_IPC_OP_EVENT, pcmk__str_casei)) { + crm_trace("Sending event to %s", ipc_client->id); + rc = pcmk__ipc_send_xml(ipc_client, 0, msg, crm_ipc_server_event); + + } else if (pcmk__str_eq(msg_type, LRMD_IPC_OP_RESPONSE, pcmk__str_casei)) { + int msg_id = 0; + + crm_element_value_int(xml, F_LRMD_IPC_MSG_ID, &msg_id); + crm_trace("Sending response to %d - %s", ipc_client->request_id, ipc_client->id); + rc = pcmk__ipc_send_xml(ipc_client, msg_id, msg, FALSE); + + CRM_LOG_ASSERT(msg_id == ipc_client->request_id); + ipc_client->request_id = 0; + + } else if (pcmk__str_eq(msg_type, LRMD_IPC_OP_DESTROY, pcmk__str_casei)) { + qb_ipcs_disconnect(ipc_client->ipcs); + + } else { + crm_err("Unknown ipc proxy msg type %s" , msg_type); + } + + if (rc != pcmk_rc_ok) { + crm_warn("Could not proxy IPC to client %s: %s " CRM_XS " rc=%d", + ipc_client->id, pcmk_rc_str(rc), rc); + } +} + +static int32_t +ipc_proxy_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) +{ + uint32_t id = 0; + uint32_t flags = 0; + pcmk__client_t *client = pcmk__find_client(c); + pcmk__client_t *ipc_proxy = pcmk__find_client_by_id(client->userdata); + xmlNode *request = NULL; + xmlNode *msg = NULL; + + if (!ipc_proxy) { + qb_ipcs_disconnect(client->ipcs); + return 0; + } + + /* This is a request from the local ipc client going + * to the ipc provider. + * + * Looking at the chain of events. + * + * -----remote node----------------|---- cluster node ------ + * ipc_client <--1--> this code + * <--2--> pacemaker-controld:remote_proxy_dispatch_internal() + * <--3--> ipc server + * + * This function is receiving a request from connection + * 1 and forwarding it to connection 2. + */ + request = pcmk__client_data2xml(client, data, &id, &flags); + + if (!request) { + return 0; + } + + CRM_CHECK(client != NULL, crm_err("Invalid client"); + free_xml(request); return FALSE); + CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client); + free_xml(request); return FALSE); + + /* This ensures that synced request/responses happen over the event channel + * in the controller, allowing the controller to process the messages async. + */ + pcmk__set_ipc_flags(flags, pcmk__client_name(client), crm_ipc_proxied); + client->request_id = id; + + msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); + crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_REQUEST); + crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id); + crm_xml_add(msg, F_LRMD_IPC_CLIENT, pcmk__client_name(client)); + crm_xml_add(msg, F_LRMD_IPC_USER, client->user); + crm_xml_add_int(msg, F_LRMD_IPC_MSG_ID, id); + crm_xml_add_int(msg, F_LRMD_IPC_MSG_FLAGS, flags); + add_message_xml(msg, F_LRMD_IPC_MSG, request); + lrmd_server_send_notify(ipc_proxy, msg); + free_xml(request); + free_xml(msg); + + return 0; +} + +/*! + * \internal + * \brief Notify a proxy provider that we wish to shut down + * + * \param[in,out] ipc_proxy IPC client connection to proxy provider + * + * \return 0 on success, -1 on error + */ +int +ipc_proxy_shutdown_req(pcmk__client_t *ipc_proxy) +{ + xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); + int rc; + + crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_SHUTDOWN_REQ); + + /* We don't really have a session, but the controller needs this attribute + * to recognize this as proxy communication. + */ + crm_xml_add(msg, F_LRMD_IPC_SESSION, "0"); + + rc = (lrmd_server_send_notify(ipc_proxy, msg) != pcmk_rc_ok)? -1 : 0; + free_xml(msg); + return rc; +} + +static int32_t +ipc_proxy_closed(qb_ipcs_connection_t * c) +{ + pcmk__client_t *client = pcmk__find_client(c); + pcmk__client_t *ipc_proxy; + + if (client == NULL) { + return 0; + } + + ipc_proxy = pcmk__find_client_by_id(client->userdata); + + crm_trace("Connection %p", c); + + if (ipc_proxy) { + xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); + crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_DESTROY); + crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id); + lrmd_server_send_notify(ipc_proxy, msg); + free_xml(msg); + } + + g_hash_table_remove(ipc_clients, client->id); + + free(client->userdata); + client->userdata = NULL; + pcmk__free_client(client); + return 0; +} + +static void +ipc_proxy_destroy(qb_ipcs_connection_t * c) +{ + crm_trace("Connection %p", c); + ipc_proxy_closed(c); +} + +static struct qb_ipcs_service_handlers crmd_proxy_callbacks = { + .connection_accept = crmd_proxy_accept, + .connection_created = NULL, + .msg_process = ipc_proxy_dispatch, + .connection_closed = ipc_proxy_closed, + .connection_destroyed = ipc_proxy_destroy +}; + +static struct qb_ipcs_service_handlers attrd_proxy_callbacks = { + .connection_accept = attrd_proxy_accept, + .connection_created = NULL, + .msg_process = ipc_proxy_dispatch, + .connection_closed = ipc_proxy_closed, + .connection_destroyed = ipc_proxy_destroy +}; + +static struct qb_ipcs_service_handlers stonith_proxy_callbacks = { + .connection_accept = stonith_proxy_accept, + .connection_created = NULL, + .msg_process = ipc_proxy_dispatch, + .connection_closed = ipc_proxy_closed, + .connection_destroyed = ipc_proxy_destroy +}; + +static struct qb_ipcs_service_handlers pacemakerd_proxy_callbacks = { + .connection_accept = pacemakerd_proxy_accept, + .connection_created = NULL, + .msg_process = NULL, + .connection_closed = NULL, + .connection_destroyed = NULL +}; + +static struct qb_ipcs_service_handlers cib_proxy_callbacks_ro = { + .connection_accept = cib_proxy_accept_ro, + .connection_created = NULL, + .msg_process = ipc_proxy_dispatch, + .connection_closed = ipc_proxy_closed, + .connection_destroyed = ipc_proxy_destroy +}; + +static struct qb_ipcs_service_handlers cib_proxy_callbacks_rw = { + .connection_accept = cib_proxy_accept_rw, + .connection_created = NULL, + .msg_process = ipc_proxy_dispatch, + .connection_closed = ipc_proxy_closed, + .connection_destroyed = ipc_proxy_destroy +}; + +void +ipc_proxy_add_provider(pcmk__client_t *ipc_proxy) +{ + // Prepending ensures the most recent connection is always first + ipc_providers = g_list_prepend(ipc_providers, ipc_proxy); +} + +void +ipc_proxy_remove_provider(pcmk__client_t *ipc_proxy) +{ + GHashTableIter iter; + pcmk__client_t *ipc_client = NULL; + char *key = NULL; + GList *remove_these = NULL; + GList *gIter = NULL; + + ipc_providers = g_list_remove(ipc_providers, ipc_proxy); + + g_hash_table_iter_init(&iter, ipc_clients); + while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & ipc_client)) { + const char *proxy_id = ipc_client->userdata; + if (pcmk__str_eq(proxy_id, ipc_proxy->id, pcmk__str_casei)) { + crm_info("ipc proxy connection for client %s pid %d destroyed because cluster node disconnected.", + ipc_client->id, ipc_client->pid); + /* we can't remove during the iteration, so copy items + * to a list we can destroy later */ + remove_these = g_list_append(remove_these, ipc_client); + } + } + + for (gIter = remove_these; gIter != NULL; gIter = gIter->next) { + ipc_client = gIter->data; + + // Disconnection callback will free the client here + qb_ipcs_disconnect(ipc_client->ipcs); + } + + /* just frees the list, not the elements in the list */ + g_list_free(remove_these); +} + +void +ipc_proxy_init(void) +{ + ipc_clients = pcmk__strkey_table(NULL, NULL); + + pcmk__serve_based_ipc(&cib_ro, &cib_rw, &cib_shm, &cib_proxy_callbacks_ro, + &cib_proxy_callbacks_rw); + pcmk__serve_attrd_ipc(&attrd_ipcs, &attrd_proxy_callbacks); + pcmk__serve_fenced_ipc(&stonith_ipcs, &stonith_proxy_callbacks); + pcmk__serve_pacemakerd_ipc(&pacemakerd_ipcs, &pacemakerd_proxy_callbacks); + crmd_ipcs = pcmk__serve_controld_ipc(&crmd_proxy_callbacks); + if (crmd_ipcs == NULL) { + crm_err("Failed to create controller: exiting and inhibiting respawn"); + crm_warn("Verify pacemaker and pacemaker_remote are not both enabled"); + crm_exit(CRM_EX_FATAL); + } +} + +void +ipc_proxy_cleanup(void) +{ + if (ipc_providers) { + g_list_free(ipc_providers); + ipc_providers = NULL; + } + if (ipc_clients) { + g_hash_table_destroy(ipc_clients); + ipc_clients = NULL; + } + pcmk__stop_based_ipc(cib_ro, cib_rw, cib_shm); + qb_ipcs_destroy(attrd_ipcs); + qb_ipcs_destroy(stonith_ipcs); + qb_ipcs_destroy(pacemakerd_ipcs); + qb_ipcs_destroy(crmd_ipcs); + cib_ro = NULL; + cib_rw = NULL; + cib_shm = NULL; +} diff --git a/daemons/execd/remoted_tls.c b/daemons/execd/remoted_tls.c new file mode 100644 index 0000000..c65e3f3 --- /dev/null +++ b/daemons/execd/remoted_tls.c @@ -0,0 +1,428 @@ +/* + * Copyright 2012-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "pacemaker-execd.h" + +#ifdef HAVE_GNUTLS_GNUTLS_H + +# include + +# define LRMD_REMOTE_AUTH_TIMEOUT 10000 +gnutls_psk_server_credentials_t psk_cred_s; +gnutls_dh_params_t dh_params; +static int ssock = -1; +extern int lrmd_call_id; + +static void +debug_log(int level, const char *str) +{ + fputs(str, stderr); +} + +/*! + * \internal + * \brief Read (more) TLS handshake data from client + * + * \param[in,out] client IPC client doing handshake + * + * \return 0 on success or more data needed, -1 on error + */ +static int +remoted__read_handshake_data(pcmk__client_t *client) +{ + int rc = pcmk__read_handshake_data(client); + + if (rc == EAGAIN) { + /* No more data is available at the moment. Just return for now; + * we'll get invoked again once the client sends more. + */ + return 0; + } else if (rc != pcmk_rc_ok) { + return -1; + } + + if (client->remote->auth_timeout) { + g_source_remove(client->remote->auth_timeout); + } + client->remote->auth_timeout = 0; + + pcmk__set_client_flags(client, pcmk__client_tls_handshake_complete); + crm_notice("Remote client connection accepted"); + + /* Only a client with access to the TLS key can connect, so we can treat + * it as privileged. + */ + pcmk__set_client_flags(client, pcmk__client_privileged); + + // Alert other clients of the new connection + notify_of_new_client(client); + return 0; +} + +static int +lrmd_remote_client_msg(gpointer data) +{ + int id = 0; + int rc; + xmlNode *request = NULL; + pcmk__client_t *client = data; + + if (!pcmk_is_set(client->flags, + pcmk__client_tls_handshake_complete)) { + return remoted__read_handshake_data(client); + } + + switch (pcmk__remote_ready(client->remote, 0)) { + case pcmk_rc_ok: + break; + case ETIME: // No message available to read + return 0; + default: // Error + crm_info("Remote client disconnected while polling it"); + return -1; + } + + rc = pcmk__read_remote_message(client->remote, -1); + + request = pcmk__remote_message_xml(client->remote); + while (request) { + crm_element_value_int(request, F_LRMD_REMOTE_MSG_ID, &id); + crm_trace("Processing remote client request %d", id); + if (!client->name) { + const char *value = crm_element_value(request, F_LRMD_CLIENTNAME); + + if (value) { + client->name = strdup(value); + } + } + + lrmd_call_id++; + if (lrmd_call_id < 1) { + lrmd_call_id = 1; + } + + crm_xml_add(request, F_LRMD_CLIENTID, client->id); + crm_xml_add(request, F_LRMD_CLIENTNAME, client->name); + crm_xml_add_int(request, F_LRMD_CALLID, lrmd_call_id); + + process_lrmd_message(client, id, request); + free_xml(request); + + /* process all the messages in the current buffer */ + request = pcmk__remote_message_xml(client->remote); + } + + if (rc == ENOTCONN) { + crm_info("Remote client disconnected while reading from it"); + return -1; + } + + return 0; +} + +static void +lrmd_remote_client_destroy(gpointer user_data) +{ + pcmk__client_t *client = user_data; + + if (client == NULL) { + return; + } + + crm_notice("Cleaning up after remote client %s disconnected", + pcmk__client_name(client)); + + ipc_proxy_remove_provider(client); + + /* if this is the last remote connection, stop recurring + * operations */ + if (pcmk__ipc_client_count() == 1) { + client_disconnect_cleanup(NULL); + } + + if (client->remote->tls_session) { + void *sock_ptr; + int csock; + + sock_ptr = gnutls_transport_get_ptr(*client->remote->tls_session); + csock = GPOINTER_TO_INT(sock_ptr); + + gnutls_bye(*client->remote->tls_session, GNUTLS_SHUT_RDWR); + gnutls_deinit(*client->remote->tls_session); + gnutls_free(client->remote->tls_session); + close(csock); + } + + lrmd_client_destroy(client); + return; +} + +static gboolean +lrmd_auth_timeout_cb(gpointer data) +{ + pcmk__client_t *client = data; + + client->remote->auth_timeout = 0; + + if (pcmk_is_set(client->flags, + pcmk__client_tls_handshake_complete)) { + return FALSE; + } + + mainloop_del_fd(client->remote->source); + client->remote->source = NULL; + crm_err("Remote client authentication timed out"); + + return FALSE; +} + +// Dispatch callback for remote server socket +static int +lrmd_remote_listen(gpointer data) +{ + int csock = -1; + gnutls_session_t *session = NULL; + pcmk__client_t *new_client = NULL; + + // For client socket + static struct mainloop_fd_callbacks lrmd_remote_fd_cb = { + .dispatch = lrmd_remote_client_msg, + .destroy = lrmd_remote_client_destroy, + }; + + CRM_CHECK(ssock >= 0, return TRUE); + + if (pcmk__accept_remote_connection(ssock, &csock) != pcmk_rc_ok) { + return TRUE; + } + + session = pcmk__new_tls_session(csock, GNUTLS_SERVER, GNUTLS_CRD_PSK, + psk_cred_s); + if (session == NULL) { + close(csock); + return TRUE; + } + + new_client = pcmk__new_unauth_client(NULL); + new_client->remote = calloc(1, sizeof(pcmk__remote_t)); + pcmk__set_client_flags(new_client, pcmk__client_tls); + new_client->remote->tls_session = session; + + // Require the client to authenticate within this time + new_client->remote->auth_timeout = g_timeout_add(LRMD_REMOTE_AUTH_TIMEOUT, + lrmd_auth_timeout_cb, + new_client); + crm_info("Remote client pending authentication " + CRM_XS " %p id: %s", new_client, new_client->id); + + new_client->remote->source = + mainloop_add_fd("pacemaker-remote-client", G_PRIORITY_DEFAULT, csock, + new_client, &lrmd_remote_fd_cb); + return TRUE; +} + +static void +tls_server_dropped(gpointer user_data) +{ + crm_notice("TLS server session ended"); + return; +} + +// \return 0 on success, -1 on error (gnutls_psk_server_credentials_function) +static int +lrmd_tls_server_key_cb(gnutls_session_t session, const char *username, gnutls_datum_t * key) +{ + return (lrmd__init_remote_key(key) == pcmk_rc_ok)? 0 : -1; +} + +static int +bind_and_listen(struct addrinfo *addr) +{ + int optval; + int fd; + int rc; + char buffer[INET6_ADDRSTRLEN] = { 0, }; + + pcmk__sockaddr2str(addr->ai_addr, buffer); + crm_trace("Attempting to bind to address %s", buffer); + + fd = socket(addr->ai_family, addr->ai_socktype, addr->ai_protocol); + if (fd < 0) { + crm_perror(LOG_ERR, "Listener socket creation failed"); + return -1; + } + + /* reuse address */ + optval = 1; + rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)); + if (rc < 0) { + crm_perror(LOG_ERR, "Local address reuse not allowed on %s", buffer); + close(fd); + return -1; + } + + if (addr->ai_family == AF_INET6) { + optval = 0; + rc = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &optval, sizeof(optval)); + if (rc < 0) { + crm_perror(LOG_INFO, "Couldn't disable IPV6-only on %s", buffer); + close(fd); + return -1; + } + } + + if (bind(fd, addr->ai_addr, addr->ai_addrlen) != 0) { + crm_perror(LOG_ERR, "Cannot bind to %s", buffer); + close(fd); + return -1; + } + + if (listen(fd, 10) == -1) { + crm_perror(LOG_ERR, "Cannot listen on %s", buffer); + close(fd); + return -1; + } + return fd; +} + +static int +get_address_info(const char *bind_name, int port, struct addrinfo **res) +{ + int rc; + char port_str[6]; // at most "65535" + struct addrinfo hints; + + memset(&hints, 0, sizeof(struct addrinfo)); + hints.ai_flags = AI_PASSIVE; + hints.ai_family = AF_UNSPEC; // IPv6 or IPv4 + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = IPPROTO_TCP; + + snprintf(port_str, sizeof(port_str), "%d", port); + rc = getaddrinfo(bind_name, port_str, &hints, res); + if (rc) { + crm_err("Unable to get IP address(es) for %s: %s", + (bind_name? bind_name : "local node"), gai_strerror(rc)); + return -EADDRNOTAVAIL; + } + return pcmk_ok; +} + +int +lrmd_init_remote_tls_server(void) +{ + int filter; + int port = crm_default_remote_port(); + struct addrinfo *res = NULL, *iter; + gnutls_datum_t psk_key = { NULL, 0 }; + const char *bind_name = getenv("PCMK_remote_address"); + + static struct mainloop_fd_callbacks remote_listen_fd_callbacks = { + .dispatch = lrmd_remote_listen, + .destroy = tls_server_dropped, + }; + + CRM_CHECK(ssock == -1, return ssock); + + crm_debug("Starting TLS listener on %s port %d", + (bind_name? bind_name : "all addresses on"), port); + crm_gnutls_global_init(); + gnutls_global_set_log_function(debug_log); + + if (pcmk__init_tls_dh(&dh_params) != pcmk_rc_ok) { + return -1; + } + gnutls_psk_allocate_server_credentials(&psk_cred_s); + gnutls_psk_set_server_credentials_function(psk_cred_s, lrmd_tls_server_key_cb); + gnutls_psk_set_server_dh_params(psk_cred_s, dh_params); + + /* The key callback won't get called until the first client connection + * attempt. Do it once here, so we can warn the user at start-up if we can't + * read the key. We don't error out, though, because it's fine if the key is + * going to be added later. + */ + if (lrmd__init_remote_key(&psk_key) != pcmk_rc_ok) { + crm_warn("A cluster connection will not be possible until the key is available"); + } + gnutls_free(psk_key.data); + + if (get_address_info(bind_name, port, &res) != pcmk_ok) { + return -1; + } + + /* Currently we listen on only one address from the resulting list (the + * first IPv6 address we can bind to if possible, otherwise the first IPv4 + * address we can bind to). When bind_name is NULL, this should be the + * respective wildcard address. + * + * @TODO If there is demand for specifying more than one address, allow + * bind_name to be a space-separated list, call getaddrinfo() for each, + * and create a socket for each result (set IPV6_V6ONLY on IPv6 sockets + * since IPv4 listeners will have their own sockets). + */ + iter = res; + filter = AF_INET6; + while (iter) { + if (iter->ai_family == filter) { + ssock = bind_and_listen(iter); + } + if (ssock != -1) { + break; + } + + iter = iter->ai_next; + if (iter == NULL && filter == AF_INET6) { + iter = res; + filter = AF_INET; + } + } + + if (ssock >= 0) { + mainloop_add_fd("pacemaker-remote-server", G_PRIORITY_DEFAULT, ssock, + NULL, &remote_listen_fd_callbacks); + crm_debug("Started TLS listener on %s port %d", + (bind_name? bind_name : "all addresses on"), port); + } + freeaddrinfo(res); + return ssock; +} + +void +execd_stop_tls_server(void) +{ + if (psk_cred_s) { + gnutls_psk_free_server_credentials(psk_cred_s); + psk_cred_s = 0; + } + + if (ssock >= 0) { + close(ssock); + ssock = -1; + } +} +#endif diff --git a/daemons/fenced/Makefile.am b/daemons/fenced/Makefile.am new file mode 100644 index 0000000..2ca0088 --- /dev/null +++ b/daemons/fenced/Makefile.am @@ -0,0 +1,52 @@ +# +# Original Author: Sun Jiang Dong +# Copyright 2004 International Business Machines +# +# with later changes copyright 2004-2023 the Pacemaker project contributors. +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. +# + +include $(top_srcdir)/mk/common.mk +include $(top_srcdir)/mk/man.mk + +halibdir = $(CRM_DAEMON_DIR) + +halib_PROGRAMS = pacemaker-fenced cts-fence-helper + +noinst_HEADERS = pacemaker-fenced.h + +if BUILD_XML_HELP +man7_MANS = pacemaker-fenced.7 +endif + +cts_fence_helper_SOURCES = cts-fence-helper.c +cts_fence_helper_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ + $(top_builddir)/lib/fencing/libstonithd.la + +pacemaker_fenced_YFLAGS = -d +pacemaker_fenced_CFLAGS = $(CFLAGS_HARDENED_EXE) +pacemaker_fenced_LDFLAGS = $(LDFLAGS_HARDENED_EXE) +pacemaker_fenced_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ + $(top_builddir)/lib/cib/libcib.la \ + $(top_builddir)/lib/cluster/libcrmcluster.la \ + $(top_builddir)/lib/fencing/libstonithd.la \ + $(top_builddir)/lib/pengine/libpe_status.la \ + $(top_builddir)/lib/pacemaker/libpacemaker.la \ + $(CLUSTERLIBS) +pacemaker_fenced_SOURCES = pacemaker-fenced.c \ + fenced_commands.c \ + fenced_remote.c \ + fenced_history.c + +CLEANFILES = $(man7_MANS) $(man8_MANS) + +if BUILD_LEGACY_LINKS +install-exec-hook: + cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f stonithd && $(LN_S) pacemaker-fenced stonithd + +uninstall-hook: + cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f stonithd +endif diff --git a/daemons/fenced/cts-fence-helper.c b/daemons/fenced/cts-fence-helper.c new file mode 100644 index 0000000..e18a1f4 --- /dev/null +++ b/daemons/fenced/cts-fence-helper.c @@ -0,0 +1,681 @@ +/* + * Copyright 2009-2023 the Pacemaker project contributors + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#define SUMMARY "cts-fence-helper - inject commands into the Pacemaker fencer and watch for events" + +static GMainLoop *mainloop = NULL; +static crm_trigger_t *trig = NULL; +static int mainloop_iter = 0; +static pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; + +typedef void (*mainloop_test_iteration_cb) (int check_event); + +#define MAINLOOP_DEFAULT_TIMEOUT 2 + +enum test_modes { + test_standard = 0, // test using a specific developer environment + test_passive, // watch notifications only + test_api_sanity, // sanity-test stonith client API using fence_dummy + test_api_mainloop, // sanity-test mainloop code with async responses +}; + +struct { + enum test_modes mode; +} options = { + .mode = test_standard +}; + +static gboolean +mode_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { + if (pcmk__str_any_of(option_name, "--mainloop_api_test", "-m", NULL)) { + options.mode = test_api_mainloop; + } else if (pcmk__str_any_of(option_name, "--api_test", "-t", NULL)) { + options.mode = test_api_sanity; + } else if (pcmk__str_any_of(option_name, "--passive", "-p", NULL)) { + options.mode = test_passive; + } + + return TRUE; +} + +static GOptionEntry entries[] = { + { "mainloop_api_test", 'm', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, mode_cb, + NULL, NULL, + }, + + { "api_test", 't', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, mode_cb, + NULL, NULL, + }, + + { "passive", 'p', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, mode_cb, + NULL, NULL, + }, + + { NULL } +}; + +static stonith_t *st = NULL; +static struct pollfd pollfd; +static const int st_opts = st_opt_sync_call; +static int expected_notifications = 0; +static int verbose = 0; + +static void +mainloop_test_done(const char *origin, bool pass) +{ + if (pass) { + crm_info("SUCCESS - %s", origin); + mainloop_iter++; + mainloop_set_trigger(trig); + result.execution_status = PCMK_EXEC_DONE; + result.exit_status = CRM_EX_OK; + } else { + crm_err("FAILURE - %s (%d: %s)", origin, result.exit_status, + pcmk_exec_status_str(result.execution_status)); + crm_exit(CRM_EX_ERROR); + } +} + + +static void +dispatch_helper(int timeout) +{ + int rc; + + crm_debug("Looking for notification"); + pollfd.events = POLLIN; + while (true) { + rc = poll(&pollfd, 1, timeout); /* wait 10 minutes, -1 forever */ + if (rc > 0) { + if (!stonith_dispatch(st)) { + break; + } + } else { + break; + } + } +} + +static void +st_callback(stonith_t * st, stonith_event_t * e) +{ + char *desc = NULL; + + if (st->state == stonith_disconnected) { + crm_exit(CRM_EX_DISCONNECT); + } + + desc = stonith__event_description(e); + crm_notice("%s", desc); + free(desc); + + if (expected_notifications) { + expected_notifications--; + } +} + +static void +st_global_callback(stonith_t * stonith, stonith_callback_data_t * data) +{ + crm_notice("Call %d exited %d: %s (%s)", + data->call_id, stonith__exit_status(data), + stonith__execution_status(data), + pcmk__s(stonith__exit_reason(data), "unspecified reason")); +} + +static void +passive_test(void) +{ + int rc = 0; + + rc = st->cmds->connect(st, crm_system_name, &pollfd.fd); + if (rc != pcmk_ok) { + stonith_api_delete(st); + crm_exit(CRM_EX_DISCONNECT); + } + st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, st_callback); + st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, st_callback); + st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback); + st->cmds->register_notification(st, STONITH_OP_DEVICE_DEL, st_callback); + st->cmds->register_callback(st, 0, 120, st_opt_timeout_updates, NULL, "st_global_callback", + st_global_callback); + + dispatch_helper(600 * 1000); +} + +#define single_test(cmd, str, num_notifications, expected_rc) \ +{ \ + int rc = 0; \ + rc = cmd; \ + expected_notifications = 0; \ + if (num_notifications) { \ + expected_notifications = num_notifications; \ + dispatch_helper(500); \ + } \ + if (rc != expected_rc) { \ + crm_err("FAILURE - expected rc %d != %d(%s) for cmd - %s", expected_rc, rc, pcmk_strerror(rc), str); \ + crm_exit(CRM_EX_ERROR); \ + } else if (expected_notifications) { \ + crm_err("FAILURE - expected %d notifications, got only %d for cmd - %s", \ + num_notifications, num_notifications - expected_notifications, str); \ + crm_exit(CRM_EX_ERROR); \ + } else { \ + if (verbose) { \ + crm_info("SUCCESS - %s: %d", str, rc); \ + } else { \ + crm_debug("SUCCESS - %s: %d", str, rc); \ + } \ + } \ +}\ + +static void +run_fence_failure_test(void) +{ + stonith_key_value_t *params = NULL; + + params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, + "false_1_node1=1,2 false_1_node2=3,4"); + params = stonith_key_value_add(params, "mode", "fail"); + + single_test(st-> + cmds->register_device(st, st_opts, "test-id1", "stonith-ng", "fence_dummy", params), + "Register device1 for failure test", 1, 0); + + single_test(st->cmds->fence(st, st_opts, "false_1_node2", "off", 3, 0), + "Fence failure results off", 1, -ENODATA); + + single_test(st->cmds->fence(st, st_opts, "false_1_node2", "reboot", 3, 0), + "Fence failure results reboot", 1, -ENODATA); + + single_test(st->cmds->remove_device(st, st_opts, "test-id1"), + "Remove device1 for failure test", 1, 0); + + stonith_key_value_freeall(params, 1, 1); +} + +static void +run_fence_failure_rollover_test(void) +{ + stonith_key_value_t *params = NULL; + + params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, + "false_1_node1=1,2 false_1_node2=3,4"); + params = stonith_key_value_add(params, "mode", "fail"); + + single_test(st-> + cmds->register_device(st, st_opts, "test-id1", "stonith-ng", "fence_dummy", params), + "Register device1 for rollover test", 1, 0); + stonith_key_value_freeall(params, 1, 1); + params = NULL; + params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, + "false_1_node1=1,2 false_1_node2=3,4"); + params = stonith_key_value_add(params, "mode", "pass"); + + single_test(st-> + cmds->register_device(st, st_opts, "test-id2", "stonith-ng", "fence_dummy", params), + "Register device2 for rollover test", 1, 0); + + single_test(st->cmds->fence(st, st_opts, "false_1_node2", "off", 3, 0), + "Fence rollover results off", 1, 0); + + /* Expect -ENODEV because fence_dummy requires 'on' to be executed on target */ + single_test(st->cmds->fence(st, st_opts, "false_1_node2", "on", 3, 0), + "Fence rollover results on", 1, -ENODEV); + + single_test(st->cmds->remove_device(st, st_opts, "test-id1"), + "Remove device1 for rollover tests", 1, 0); + + single_test(st->cmds->remove_device(st, st_opts, "test-id2"), + "Remove device2 for rollover tests", 1, 0); + + stonith_key_value_freeall(params, 1, 1); +} + +static void +run_standard_test(void) +{ + stonith_key_value_t *params = NULL; + + params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, + "false_1_node1=1,2 false_1_node2=3,4"); + params = stonith_key_value_add(params, "mode", "pass"); + params = stonith_key_value_add(params, "mock_dynamic_hosts", "false_1_node1 false_1_node2"); + + single_test(st-> + cmds->register_device(st, st_opts, "test-id", "stonith-ng", "fence_dummy", params), + "Register", 1, 0); + stonith_key_value_freeall(params, 1, 1); + params = NULL; + + single_test(st->cmds->list(st, st_opts, "test-id", NULL, 1), "list", 1, 0); + + single_test(st->cmds->monitor(st, st_opts, "test-id", 1), "Monitor", 1, 0); + + single_test(st->cmds->status(st, st_opts, "test-id", "false_1_node2", 1), + "Status false_1_node2", 1, 0); + + single_test(st->cmds->status(st, st_opts, "test-id", "false_1_node1", 1), + "Status false_1_node1", 1, 0); + + single_test(st->cmds->fence(st, st_opts, "unknown-host", "off", 1, 0), + "Fence unknown-host (expected failure)", 0, -ENODEV); + + single_test(st->cmds->fence(st, st_opts, "false_1_node1", "off", 1, 0), + "Fence false_1_node1", 1, 0); + + /* Expect -ENODEV because fence_dummy requires 'on' to be executed on target */ + single_test(st->cmds->fence(st, st_opts, "false_1_node1", "on", 1, 0), + "Unfence false_1_node1", 1, -ENODEV); + + /* Confirm that an invalid level index is rejected */ + single_test(st->cmds->register_level(st, st_opts, "node1", 999, params), + "Attempt to register an invalid level index", 0, -EINVAL); + + single_test(st->cmds->remove_device(st, st_opts, "test-id"), "Remove test-id", 1, 0); + + stonith_key_value_freeall(params, 1, 1); +} + +static void +sanity_tests(void) +{ + int rc = 0; + + rc = st->cmds->connect(st, crm_system_name, &pollfd.fd); + if (rc != pcmk_ok) { + stonith_api_delete(st); + crm_exit(CRM_EX_DISCONNECT); + } + st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, st_callback); + st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, st_callback); + st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback); + st->cmds->register_notification(st, STONITH_OP_DEVICE_DEL, st_callback); + st->cmds->register_callback(st, 0, 120, st_opt_timeout_updates, NULL, "st_global_callback", + st_global_callback); + + crm_info("Starting API Sanity Tests"); + run_standard_test(); + run_fence_failure_test(); + run_fence_failure_rollover_test(); + crm_info("Sanity Tests Passed"); +} + +static void +standard_dev_test(void) +{ + int rc = 0; + char *tmp = NULL; + stonith_key_value_t *params = NULL; + + rc = st->cmds->connect(st, crm_system_name, &pollfd.fd); + if (rc != pcmk_ok) { + stonith_api_delete(st); + crm_exit(CRM_EX_DISCONNECT); + } + + params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, + "some-host=pcmk-7 true_1_node1=3,4"); + + rc = st->cmds->register_device(st, st_opts, "test-id", "stonith-ng", "fence_xvm", params); + crm_debug("Register: %d", rc); + + rc = st->cmds->list(st, st_opts, "test-id", &tmp, 10); + crm_debug("List: %d output: %s", rc, tmp ? tmp : ""); + + rc = st->cmds->monitor(st, st_opts, "test-id", 10); + crm_debug("Monitor: %d", rc); + + rc = st->cmds->status(st, st_opts, "test-id", "false_1_node2", 10); + crm_debug("Status false_1_node2: %d", rc); + + rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); + crm_debug("Status false_1_node1: %d", rc); + + rc = st->cmds->fence(st, st_opts, "unknown-host", "off", 60, 0); + crm_debug("Fence unknown-host: %d", rc); + + rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); + crm_debug("Status false_1_node1: %d", rc); + + rc = st->cmds->fence(st, st_opts, "false_1_node1", "off", 60, 0); + crm_debug("Fence false_1_node1: %d", rc); + + rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); + crm_debug("Status false_1_node1: %d", rc); + + rc = st->cmds->fence(st, st_opts, "false_1_node1", "on", 10, 0); + crm_debug("Unfence false_1_node1: %d", rc); + + rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); + crm_debug("Status false_1_node1: %d", rc); + + rc = st->cmds->fence(st, st_opts, "some-host", "off", 10, 0); + crm_debug("Fence alias: %d", rc); + + rc = st->cmds->status(st, st_opts, "test-id", "some-host", 10); + crm_debug("Status alias: %d", rc); + + rc = st->cmds->fence(st, st_opts, "false_1_node1", "on", 10, 0); + crm_debug("Unfence false_1_node1: %d", rc); + + rc = st->cmds->remove_device(st, st_opts, "test-id"); + crm_debug("Remove test-id: %d", rc); + + stonith_key_value_freeall(params, 1, 1); +} + +static void + iterate_mainloop_tests(gboolean event_ready); + +static void +mainloop_callback(stonith_t * stonith, stonith_callback_data_t * data) +{ + pcmk__set_result(&result, stonith__exit_status(data), + stonith__execution_status(data), + stonith__exit_reason(data)); + iterate_mainloop_tests(TRUE); +} + +static int +register_callback_helper(int callid) +{ + return st->cmds->register_callback(st, + callid, + MAINLOOP_DEFAULT_TIMEOUT, + st_opt_timeout_updates, NULL, "callback", mainloop_callback); +} + +static void +test_async_fence_pass(int check_event) +{ + int rc = 0; + + if (check_event) { + mainloop_test_done(__func__, (result.exit_status == CRM_EX_OK)); + return; + } + + rc = st->cmds->fence(st, 0, "true_1_node1", "off", MAINLOOP_DEFAULT_TIMEOUT, 0); + if (rc < 0) { + crm_err("fence failed with rc %d", rc); + mainloop_test_done(__func__, false); + } + register_callback_helper(rc); + /* wait for event */ +} + +#define CUSTOM_TIMEOUT_ADDITION 10 +static void +test_async_fence_custom_timeout(int check_event) +{ + int rc = 0; + static time_t begin = 0; + + if (check_event) { + uint32_t diff = (time(NULL) - begin); + + if (result.execution_status != PCMK_EXEC_TIMEOUT) { + mainloop_test_done(__func__, false); + } else if (diff < CUSTOM_TIMEOUT_ADDITION + MAINLOOP_DEFAULT_TIMEOUT) { + crm_err + ("Custom timeout test failed, callback expiration should be updated to %d, actual timeout was %d", + CUSTOM_TIMEOUT_ADDITION + MAINLOOP_DEFAULT_TIMEOUT, diff); + mainloop_test_done(__func__, false); + } else { + mainloop_test_done(__func__, true); + } + return; + } + begin = time(NULL); + + rc = st->cmds->fence(st, 0, "custom_timeout_node1", "off", MAINLOOP_DEFAULT_TIMEOUT, 0); + if (rc < 0) { + crm_err("fence failed with rc %d", rc); + mainloop_test_done(__func__, false); + } + register_callback_helper(rc); + /* wait for event */ +} + +static void +test_async_fence_timeout(int check_event) +{ + int rc = 0; + + if (check_event) { + mainloop_test_done(__func__, + (result.execution_status == PCMK_EXEC_NO_FENCE_DEVICE)); + return; + } + + rc = st->cmds->fence(st, 0, "false_1_node2", "off", MAINLOOP_DEFAULT_TIMEOUT, 0); + if (rc < 0) { + crm_err("fence failed with rc %d", rc); + mainloop_test_done(__func__, false); + } + register_callback_helper(rc); + /* wait for event */ +} + +static void +test_async_monitor(int check_event) +{ + int rc = 0; + + if (check_event) { + mainloop_test_done(__func__, (result.exit_status == CRM_EX_OK)); + return; + } + + rc = st->cmds->monitor(st, 0, "false_1", MAINLOOP_DEFAULT_TIMEOUT); + if (rc < 0) { + crm_err("monitor failed with rc %d", rc); + mainloop_test_done(__func__, false); + } + + register_callback_helper(rc); + /* wait for event */ +} + +static void +test_register_async_devices(int check_event) +{ + char buf[16] = { 0, }; + stonith_key_value_t *params = NULL; + + params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, + "false_1_node1=1,2"); + params = stonith_key_value_add(params, "mode", "fail"); + st->cmds->register_device(st, st_opts, "false_1", "stonith-ng", "fence_dummy", params); + stonith_key_value_freeall(params, 1, 1); + + params = NULL; + params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, + "true_1_node1=1,2"); + params = stonith_key_value_add(params, "mode", "pass"); + st->cmds->register_device(st, st_opts, "true_1", "stonith-ng", "fence_dummy", params); + stonith_key_value_freeall(params, 1, 1); + + params = NULL; + params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, + "custom_timeout_node1=1,2"); + params = stonith_key_value_add(params, "mode", "fail"); + params = stonith_key_value_add(params, "delay", "1000"); + snprintf(buf, sizeof(buf) - 1, "%d", MAINLOOP_DEFAULT_TIMEOUT + CUSTOM_TIMEOUT_ADDITION); + params = stonith_key_value_add(params, "pcmk_off_timeout", buf); + st->cmds->register_device(st, st_opts, "false_custom_timeout", "stonith-ng", "fence_dummy", + params); + stonith_key_value_freeall(params, 1, 1); + + mainloop_test_done(__func__, true); +} + +static void +try_mainloop_connect(int check_event) +{ + int rc = stonith_api_connect_retry(st, crm_system_name, 10); + + if (rc == pcmk_ok) { + mainloop_test_done(__func__, true); + return; + } + crm_err("API CONNECTION FAILURE"); + mainloop_test_done(__func__, false); +} + +static void +iterate_mainloop_tests(gboolean event_ready) +{ + static mainloop_test_iteration_cb callbacks[] = { + try_mainloop_connect, + test_register_async_devices, + test_async_monitor, + test_async_fence_pass, + test_async_fence_timeout, + test_async_fence_custom_timeout, + }; + + if (mainloop_iter == (sizeof(callbacks) / sizeof(mainloop_test_iteration_cb))) { + /* all tests ran, everything passed */ + crm_info("ALL MAINLOOP TESTS PASSED!"); + crm_exit(CRM_EX_OK); + } + + callbacks[mainloop_iter] (event_ready); +} + +static gboolean +trigger_iterate_mainloop_tests(gpointer user_data) +{ + iterate_mainloop_tests(FALSE); + return TRUE; +} + +static void +test_shutdown(int nsig) +{ + int rc = 0; + + if (st) { + rc = st->cmds->disconnect(st); + crm_info("Disconnect: %d", rc); + + crm_debug("Destroy"); + stonith_api_delete(st); + } + + if (rc) { + crm_exit(CRM_EX_ERROR); + } +} + +static void +mainloop_tests(void) +{ + trig = mainloop_add_trigger(G_PRIORITY_HIGH, trigger_iterate_mainloop_tests, NULL); + mainloop_set_trigger(trig); + mainloop_add_signal(SIGTERM, test_shutdown); + + crm_info("Starting"); + mainloop = g_main_loop_new(NULL, FALSE); + g_main_loop_run(mainloop); +} + +static GOptionContext * +build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { + GOptionContext *context = NULL; + + context = pcmk__build_arg_context(args, NULL, group, NULL); + pcmk__add_main_args(context, entries); + return context; +} + +int +main(int argc, char **argv) +{ + GError *error = NULL; + crm_exit_t exit_code = CRM_EX_OK; + + pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); + gchar **processed_args = pcmk__cmdline_preproc(argv, NULL); + GOptionContext *context = build_arg_context(args, NULL); + + if (!g_option_context_parse_strv(context, &processed_args, &error)) { + exit_code = CRM_EX_USAGE; + goto done; + } + + /* We have to use crm_log_init here to set up the logging because there's + * different handling for daemons vs. command line programs, and + * pcmk__cli_init_logging is set up to only handle the latter. + */ + crm_log_init(NULL, LOG_INFO, TRUE, (verbose? TRUE : FALSE), argc, argv, + FALSE); + + for (int i = 0; i < args->verbosity; i++) { + crm_bump_log_level(argc, argv); + } + + st = stonith_api_new(); + if (st == NULL) { + exit_code = CRM_EX_DISCONNECT; + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "Could not connect to fencer: API memory allocation failed"); + goto done; + } + + switch (options.mode) { + case test_standard: + standard_dev_test(); + break; + case test_passive: + passive_test(); + break; + case test_api_sanity: + sanity_tests(); + break; + case test_api_mainloop: + mainloop_tests(); + break; + } + + test_shutdown(0); + +done: + g_strfreev(processed_args); + pcmk__free_arg_context(context); + + pcmk__output_and_clear_error(&error, NULL); + crm_exit(exit_code); +} diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c new file mode 100644 index 0000000..ba63cf8 --- /dev/null +++ b/daemons/fenced/fenced_commands.c @@ -0,0 +1,3674 @@ +/* + * Copyright 2009-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +GHashTable *device_list = NULL; +GHashTable *topology = NULL; +static GList *cmd_list = NULL; + +static GHashTable *fenced_handlers = NULL; + +struct device_search_s { + /* target of fence action */ + char *host; + /* requested fence action */ + char *action; + /* timeout to use if a device is queried dynamically for possible targets */ + int per_device_timeout; + /* number of registered fencing devices at time of request */ + int replies_needed; + /* number of device replies received so far */ + int replies_received; + /* whether the target is eligible to perform requested action (or off) */ + bool allow_suicide; + + /* private data to pass to search callback function */ + void *user_data; + /* function to call when all replies have been received */ + void (*callback) (GList * devices, void *user_data); + /* devices capable of performing requested action (or off if remapping) */ + GList *capable; + /* Whether to perform searches that support the action */ + uint32_t support_action_only; +}; + +static gboolean stonith_device_dispatch(gpointer user_data); +static void st_child_done(int pid, const pcmk__action_result_t *result, + void *user_data); +static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, + pcmk__client_t *client); + +static void search_devices_record_result(struct device_search_s *search, const char *device, + gboolean can_fence); + +static int get_agent_metadata(const char *agent, xmlNode **metadata); +static void read_action_metadata(stonith_device_t *device); +static enum fenced_target_by unpack_level_kind(const xmlNode *level); + +typedef struct async_command_s { + + int id; + int pid; + int fd_stdout; + int options; + int default_timeout; /* seconds */ + int timeout; /* seconds */ + + int start_delay; // seconds (-1 means disable static/random fencing delays) + int delay_id; + + char *op; + char *origin; + char *client; + char *client_name; + char *remote_op_id; + + char *target; + uint32_t target_nodeid; + char *action; + char *device; + + GList *device_list; + GList *next_device_iter; // device_list entry for next device to execute + + void *internal_user_data; + void (*done_cb) (int pid, const pcmk__action_result_t *result, + void *user_data); + guint timer_sigterm; + guint timer_sigkill; + /*! If the operation timed out, this is the last signal + * we sent to the process to get it to terminate */ + int last_timeout_signo; + + stonith_device_t *active_on; + stonith_device_t *activating_on; +} async_command_t; + +static xmlNode *construct_async_reply(const async_command_t *cmd, + const pcmk__action_result_t *result); + +static gboolean +is_action_required(const char *action, const stonith_device_t *device) +{ + return (device != NULL) && device->automatic_unfencing + && pcmk__str_eq(action, "on", pcmk__str_none); +} + +static int +get_action_delay_max(const stonith_device_t *device, const char *action) +{ + const char *value = NULL; + int delay_max = 0; + + if (!pcmk__is_fencing_action(action)) { + return 0; + } + + value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX); + if (value) { + delay_max = crm_parse_interval_spec(value) / 1000; + } + + return delay_max; +} + +static int +get_action_delay_base(const stonith_device_t *device, const char *action, + const char *target) +{ + char *hash_value = NULL; + int delay_base = 0; + + if (!pcmk__is_fencing_action(action)) { + return 0; + } + + hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE); + + if (hash_value) { + char *value = strdup(hash_value); + char *valptr = value; + + CRM_ASSERT(value != NULL); + + if (target != NULL) { + for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) { + char *mapval = strchr(val, ':'); + + if (mapval == NULL || mapval[1] == 0) { + crm_err("pcmk_delay_base: empty value in mapping", val); + continue; + } + + if (mapval != val && strncasecmp(target, val, (size_t)(mapval - val)) == 0) { + value = mapval + 1; + crm_debug("pcmk_delay_base mapped to %s for %s", + value, target); + break; + } + } + } + + if (strchr(value, ':') == 0) { + delay_base = crm_parse_interval_spec(value) / 1000; + } + + free(valptr); + } + + return delay_base; +} + +/*! + * \internal + * \brief Override STONITH timeout with pcmk_*_timeout if available + * + * \param[in] device STONITH device to use + * \param[in] action STONITH action name + * \param[in] default_timeout Timeout to use if device does not have + * a pcmk_*_timeout parameter for action + * + * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout + * \note For consistency, it would be nice if reboot/off/on timeouts could be + * set the same way as start/stop/monitor timeouts, i.e. with an + * entry in the fencing resource configuration. However that + * is insufficient because fencing devices may be registered directly via + * the fencer's register_device() API instead of going through the CIB + * (e.g. stonith_admin uses it for its -R option, and the executor uses it + * to ensure a device is registered when a command is issued). As device + * properties, pcmk_*_timeout parameters can be grabbed by the fencer when + * the device is registered, whether by CIB change or API call. + */ +static int +get_action_timeout(const stonith_device_t *device, const char *action, + int default_timeout) +{ + if (action && device && device->params) { + char buffer[64] = { 0, }; + const char *value = NULL; + + /* If "reboot" was requested but the device does not support it, + * we will remap to "off", so check timeout for "off" instead + */ + if (pcmk__str_eq(action, "reboot", pcmk__str_none) + && !pcmk_is_set(device->flags, st_device_supports_reboot)) { + crm_trace("%s doesn't support reboot, using timeout for off instead", + device->id); + action = "off"; + } + + /* If the device config specified an action-specific timeout, use it */ + snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action); + value = g_hash_table_lookup(device->params, buffer); + if (value) { + return atoi(value); + } + } + return default_timeout; +} + +/*! + * \internal + * \brief Get the currently executing device for a fencing operation + * + * \param[in] cmd Fencing operation to check + * + * \return Currently executing device for \p cmd if any, otherwise NULL + */ +static stonith_device_t * +cmd_device(const async_command_t *cmd) +{ + if ((cmd == NULL) || (cmd->device == NULL) || (device_list == NULL)) { + return NULL; + } + return g_hash_table_lookup(device_list, cmd->device); +} + +/*! + * \internal + * \brief Return the configured reboot action for a given device + * + * \param[in] device_id Device ID + * + * \return Configured reboot action for \p device_id + */ +const char * +fenced_device_reboot_action(const char *device_id) +{ + const char *action = NULL; + + if ((device_list != NULL) && (device_id != NULL)) { + stonith_device_t *device = g_hash_table_lookup(device_list, device_id); + + if ((device != NULL) && (device->params != NULL)) { + action = g_hash_table_lookup(device->params, "pcmk_reboot_action"); + } + } + return pcmk__s(action, "reboot"); +} + +/*! + * \internal + * \brief Check whether a given device supports the "on" action + * + * \param[in] device_id Device ID + * + * \return true if \p device_id supports "on", otherwise false + */ +bool +fenced_device_supports_on(const char *device_id) +{ + if ((device_list != NULL) && (device_id != NULL)) { + stonith_device_t *device = g_hash_table_lookup(device_list, device_id); + + if (device != NULL) { + return pcmk_is_set(device->flags, st_device_supports_on); + } + } + return false; +} + +static void +free_async_command(async_command_t * cmd) +{ + if (!cmd) { + return; + } + + if (cmd->delay_id) { + g_source_remove(cmd->delay_id); + } + + cmd_list = g_list_remove(cmd_list, cmd); + + g_list_free_full(cmd->device_list, free); + free(cmd->device); + free(cmd->action); + free(cmd->target); + free(cmd->remote_op_id); + free(cmd->client); + free(cmd->client_name); + free(cmd->origin); + free(cmd->op); + free(cmd); +} + +/*! + * \internal + * \brief Create a new asynchronous fencing operation from request XML + * + * \param[in] msg Fencing request XML (from IPC or CPG) + * + * \return Newly allocated fencing operation on success, otherwise NULL + * + * \note This asserts on memory errors, so a NULL return indicates an + * unparseable message. + */ +static async_command_t * +create_async_command(xmlNode *msg) +{ + xmlNode *op = NULL; + async_command_t *cmd = NULL; + + if (msg == NULL) { + return NULL; + } + + op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR); + if (op == NULL) { + return NULL; + } + + cmd = calloc(1, sizeof(async_command_t)); + CRM_ASSERT(cmd != NULL); + + // All messages must include these + cmd->action = crm_element_value_copy(op, F_STONITH_ACTION); + cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION); + cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID); + if ((cmd->action == NULL) || (cmd->op == NULL) || (cmd->client == NULL)) { + free_async_command(cmd); + return NULL; + } + + crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id)); + crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options)); + crm_element_value_int(msg, F_STONITH_DELAY, &(cmd->start_delay)); + crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout)); + cmd->timeout = cmd->default_timeout; + + cmd->origin = crm_element_value_copy(msg, F_ORIG); + cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID); + cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME); + cmd->target = crm_element_value_copy(op, F_STONITH_TARGET); + cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE); + + cmd->done_cb = st_child_done; + + // Track in global command list + cmd_list = g_list_append(cmd_list, cmd); + + return cmd; +} + +static int +get_action_limit(stonith_device_t * device) +{ + const char *value = NULL; + int action_limit = 1; + + value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT); + if ((value == NULL) + || (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok) + || (action_limit == 0)) { + action_limit = 1; + } + return action_limit; +} + +static int +get_active_cmds(stonith_device_t * device) +{ + int counter = 0; + GList *gIter = NULL; + GList *gIterNext = NULL; + + CRM_CHECK(device != NULL, return 0); + + for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { + async_command_t *cmd = gIter->data; + + gIterNext = gIter->next; + + if (cmd->active_on == device) { + counter++; + } + } + + return counter; +} + +static void +fork_cb(int pid, void *user_data) +{ + async_command_t *cmd = (async_command_t *) user_data; + stonith_device_t * device = + /* in case of a retry we've done the move from + activating_on to active_on already + */ + cmd->activating_on?cmd->activating_on:cmd->active_on; + + CRM_ASSERT(device); + crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout", + cmd->action, pid, + ((cmd->target == NULL)? "" : " targeting "), + pcmk__s(cmd->target, ""), device->id, cmd->timeout); + cmd->active_on = device; + cmd->activating_on = NULL; +} + +static int +get_agent_metadata_cb(gpointer data) { + stonith_device_t *device = data; + guint period_ms; + + switch (get_agent_metadata(device->agent, &device->agent_metadata)) { + case pcmk_rc_ok: + if (device->agent_metadata) { + read_action_metadata(device); + stonith__device_parameter_flags(&(device->flags), device->id, + device->agent_metadata); + } + return G_SOURCE_REMOVE; + + case EAGAIN: + period_ms = pcmk__mainloop_timer_get_period(device->timer); + if (period_ms < 160 * 1000) { + mainloop_timer_set_period(device->timer, 2 * period_ms); + } + return G_SOURCE_CONTINUE; + + default: + return G_SOURCE_REMOVE; + } +} + +/*! + * \internal + * \brief Call a command's action callback for an internal (not library) result + * + * \param[in,out] cmd Command to report result for + * \param[in] execution_status Execution status to use for result + * \param[in] exit_status Exit status to use for result + * \param[in] exit_reason Exit reason to use for result + */ +static void +report_internal_result(async_command_t *cmd, int exit_status, + int execution_status, const char *exit_reason) +{ + pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; + + pcmk__set_result(&result, exit_status, execution_status, exit_reason); + cmd->done_cb(0, &result, cmd); + pcmk__reset_result(&result); +} + +static gboolean +stonith_device_execute(stonith_device_t * device) +{ + int exec_rc = 0; + const char *action_str = NULL; + const char *host_arg = NULL; + async_command_t *cmd = NULL; + stonith_action_t *action = NULL; + int active_cmds = 0; + int action_limit = 0; + GList *gIter = NULL; + GList *gIterNext = NULL; + + CRM_CHECK(device != NULL, return FALSE); + + active_cmds = get_active_cmds(device); + action_limit = get_action_limit(device); + if (action_limit > -1 && active_cmds >= action_limit) { + crm_trace("%s is over its action limit of %d (%u active action%s)", + device->id, action_limit, active_cmds, + pcmk__plural_s(active_cmds)); + return TRUE; + } + + for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) { + async_command_t *pending_op = gIter->data; + + gIterNext = gIter->next; + + if (pending_op && pending_op->delay_id) { + crm_trace("Operation '%s'%s%s using %s was asked to run too early, " + "waiting for start delay of %ds", + pending_op->action, + ((pending_op->target == NULL)? "" : " targeting "), + pcmk__s(pending_op->target, ""), + device->id, pending_op->start_delay); + continue; + } + + device->pending_ops = g_list_remove_link(device->pending_ops, gIter); + g_list_free_1(gIter); + + cmd = pending_op; + break; + } + + if (cmd == NULL) { + crm_trace("No actions using %s are needed", device->id); + return TRUE; + } + + if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, + STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { + if (pcmk__is_fencing_action(cmd->action)) { + if (node_does_watchdog_fencing(stonith_our_uname)) { + pcmk__panic(__func__); + goto done; + } + } else { + crm_info("Faking success for %s watchdog operation", cmd->action); + report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + goto done; + } + } + +#if SUPPORT_CIBSECRETS + exec_rc = pcmk__substitute_secrets(device->id, device->params); + if (exec_rc != pcmk_rc_ok) { + if (pcmk__str_eq(cmd->action, "stop", pcmk__str_none)) { + crm_info("Proceeding with stop operation for %s " + "despite being unable to load CIB secrets (%s)", + device->id, pcmk_rc_str(exec_rc)); + } else { + crm_err("Considering %s unconfigured " + "because unable to load CIB secrets: %s", + device->id, pcmk_rc_str(exec_rc)); + report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS, + "Failed to get CIB secrets"); + goto done; + } + } +#endif + + action_str = cmd->action; + if (pcmk__str_eq(cmd->action, "reboot", pcmk__str_none) + && !pcmk_is_set(device->flags, st_device_supports_reboot)) { + + crm_notice("Remapping 'reboot' action%s%s using %s to 'off' " + "because agent '%s' does not support reboot", + ((cmd->target == NULL)? "" : " targeting "), + pcmk__s(cmd->target, ""), device->id, device->agent); + action_str = "off"; + } + + if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) { + host_arg = "port"; + + } else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) { + host_arg = "plug"; + } + + action = stonith__action_create(device->agent, action_str, cmd->target, + cmd->target_nodeid, cmd->timeout, + device->params, device->aliases, host_arg); + + /* for async exec, exec_rc is negative for early error exit + otherwise handling of success/errors is done via callbacks */ + cmd->activating_on = device; + exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb, + fork_cb); + if (exec_rc < 0) { + cmd->activating_on = NULL; + cmd->done_cb(0, stonith__action_result(action), cmd); + stonith__destroy_action(action); + } + +done: + /* Device might get triggered to work by multiple fencing commands + * simultaneously. Trigger the device again to make sure any + * remaining concurrent commands get executed. */ + if (device->pending_ops) { + mainloop_set_trigger(device->work); + } + return TRUE; +} + +static gboolean +stonith_device_dispatch(gpointer user_data) +{ + return stonith_device_execute(user_data); +} + +static gboolean +start_delay_helper(gpointer data) +{ + async_command_t *cmd = data; + stonith_device_t *device = cmd_device(cmd); + + cmd->delay_id = 0; + if (device) { + mainloop_set_trigger(device->work); + } + + return FALSE; +} + +static void +schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) +{ + int delay_max = 0; + int delay_base = 0; + int requested_delay = cmd->start_delay; + + CRM_CHECK(cmd != NULL, return); + CRM_CHECK(device != NULL, return); + + if (cmd->device) { + free(cmd->device); + } + + if (device->include_nodeid && (cmd->target != NULL)) { + crm_node_t *node = crm_get_peer(0, cmd->target); + + cmd->target_nodeid = node->id; + } + + cmd->device = strdup(device->id); + cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout); + + if (cmd->remote_op_id) { + crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s " + "with op id %.8s and timeout %ds", + cmd->action, + (cmd->target == NULL)? "" : " targeting ", + pcmk__s(cmd->target, ""), + device->id, cmd->origin, cmd->remote_op_id, cmd->timeout); + } else { + crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds", + cmd->action, + (cmd->target == NULL)? "" : " targeting ", + pcmk__s(cmd->target, ""), + device->id, cmd->client, cmd->timeout); + } + + device->pending_ops = g_list_append(device->pending_ops, cmd); + mainloop_set_trigger(device->work); + + // Value -1 means disable any static/random fencing delays + if (requested_delay < 0) { + return; + } + + delay_max = get_action_delay_max(device, cmd->action); + delay_base = get_action_delay_base(device, cmd->action, cmd->target); + if (delay_max == 0) { + delay_max = delay_base; + } + if (delay_max < delay_base) { + crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than " + PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s " + "(limiting to maximum delay)", + delay_base, delay_max, cmd->action, device->id); + delay_base = delay_max; + } + if (delay_max > 0) { + // coverity[dont_call] We're not using rand() for security + cmd->start_delay += + ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) + + delay_base; + } + + if (cmd->start_delay > 0) { + crm_notice("Delaying '%s' action%s%s using %s for %ds " CRM_XS + " timeout=%ds requested_delay=%ds base=%ds max=%ds", + cmd->action, + (cmd->target == NULL)? "" : " targeting ", + pcmk__s(cmd->target, ""), + device->id, cmd->start_delay, cmd->timeout, + requested_delay, delay_base, delay_max); + cmd->delay_id = + g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd); + } +} + +static void +free_device(gpointer data) +{ + GList *gIter = NULL; + stonith_device_t *device = data; + + g_hash_table_destroy(device->params); + g_hash_table_destroy(device->aliases); + + for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { + async_command_t *cmd = gIter->data; + + crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action); + report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, + "Device was removed before action could be executed"); + } + g_list_free(device->pending_ops); + + g_list_free_full(device->targets, free); + + if (device->timer) { + mainloop_timer_stop(device->timer); + mainloop_timer_del(device->timer); + } + + mainloop_destroy_trigger(device->work); + + free_xml(device->agent_metadata); + free(device->namespace); + if (device->on_target_actions != NULL) { + g_string_free(device->on_target_actions, TRUE); + } + free(device->agent); + free(device->id); + free(device); +} + +void free_device_list(void) +{ + if (device_list != NULL) { + g_hash_table_destroy(device_list); + device_list = NULL; + } +} + +void +init_device_list(void) +{ + if (device_list == NULL) { + device_list = pcmk__strkey_table(NULL, free_device); + } +} + +static GHashTable * +build_port_aliases(const char *hostmap, GList ** targets) +{ + char *name = NULL; + int last = 0, lpc = 0, max = 0, added = 0; + GHashTable *aliases = pcmk__strikey_table(free, free); + + if (hostmap == NULL) { + return aliases; + } + + max = strlen(hostmap); + for (; lpc <= max; lpc++) { + switch (hostmap[lpc]) { + /* Skip escaped chars */ + case '\\': + lpc++; + break; + + /* Assignment chars */ + case '=': + case ':': + if (lpc > last) { + free(name); + name = calloc(1, 1 + lpc - last); + memcpy(name, hostmap + last, lpc - last); + } + last = lpc + 1; + break; + + /* Delimeter chars */ + /* case ',': Potentially used to specify multiple ports */ + case 0: + case ';': + case ' ': + case '\t': + if (name) { + char *value = NULL; + int k = 0; + + value = calloc(1, 1 + lpc - last); + memcpy(value, hostmap + last, lpc - last); + + for (int i = 0; value[i] != '\0'; i++) { + if (value[i] != '\\') { + value[k++] = value[i]; + } + } + value[k] = '\0'; + + crm_debug("Adding alias '%s'='%s'", name, value); + g_hash_table_replace(aliases, name, value); + if (targets) { + *targets = g_list_append(*targets, strdup(value)); + } + value = NULL; + name = NULL; + added++; + + } else if (lpc > last) { + crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last); + } + + last = lpc + 1; + break; + } + + if (hostmap[lpc] == 0) { + break; + } + } + + if (added == 0) { + crm_info("No host mappings detected in '%s'", hostmap); + } + + free(name); + return aliases; +} + +GHashTable *metadata_cache = NULL; + +void +free_metadata_cache(void) { + if (metadata_cache != NULL) { + g_hash_table_destroy(metadata_cache); + metadata_cache = NULL; + } +} + +static void +init_metadata_cache(void) { + if (metadata_cache == NULL) { + metadata_cache = pcmk__strkey_table(free, free); + } +} + +int +get_agent_metadata(const char *agent, xmlNode ** metadata) +{ + char *buffer = NULL; + + if (metadata == NULL) { + return EINVAL; + } + *metadata = NULL; + if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) { + return pcmk_rc_ok; + } + init_metadata_cache(); + buffer = g_hash_table_lookup(metadata_cache, agent); + if (buffer == NULL) { + stonith_t *st = stonith_api_new(); + int rc; + + if (st == NULL) { + crm_warn("Could not get agent meta-data: " + "API memory allocation failed"); + return EAGAIN; + } + rc = st->cmds->metadata(st, st_opt_sync_call, agent, + NULL, &buffer, 10); + stonith_api_delete(st); + if (rc || !buffer) { + crm_err("Could not retrieve metadata for fencing agent %s", agent); + return EAGAIN; + } + g_hash_table_replace(metadata_cache, strdup(agent), buffer); + } + + *metadata = string2xml(buffer); + return pcmk_rc_ok; +} + +static gboolean +is_nodeid_required(xmlNode * xml) +{ + xmlXPathObjectPtr xpath = NULL; + + if (stand_alone) { + return FALSE; + } + + if (!xml) { + return FALSE; + } + + xpath = xpath_search(xml, "//parameter[@name='nodeid']"); + if (numXpathResults(xpath) <= 0) { + freeXpathObject(xpath); + return FALSE; + } + + freeXpathObject(xpath); + return TRUE; +} + +static void +read_action_metadata(stonith_device_t *device) +{ + xmlXPathObjectPtr xpath = NULL; + int max = 0; + int lpc = 0; + + if (device->agent_metadata == NULL) { + return; + } + + xpath = xpath_search(device->agent_metadata, "//action"); + max = numXpathResults(xpath); + + if (max <= 0) { + freeXpathObject(xpath); + return; + } + + for (lpc = 0; lpc < max; lpc++) { + const char *action = NULL; + xmlNode *match = getXpathResult(xpath, lpc); + + CRM_LOG_ASSERT(match != NULL); + if(match == NULL) { continue; }; + + action = crm_element_value(match, "name"); + + if (pcmk__str_eq(action, "list", pcmk__str_none)) { + stonith__set_device_flags(device->flags, device->id, + st_device_supports_list); + } else if (pcmk__str_eq(action, "status", pcmk__str_none)) { + stonith__set_device_flags(device->flags, device->id, + st_device_supports_status); + } else if (pcmk__str_eq(action, "reboot", pcmk__str_none)) { + stonith__set_device_flags(device->flags, device->id, + st_device_supports_reboot); + } else if (pcmk__str_eq(action, "on", pcmk__str_none)) { + /* "automatic" means the cluster will unfence node when it joins */ + /* "required" is a deprecated synonym for "automatic" */ + if (pcmk__xe_attr_is_true(match, "automatic") || pcmk__xe_attr_is_true(match, "required")) { + device->automatic_unfencing = TRUE; + } + stonith__set_device_flags(device->flags, device->id, + st_device_supports_on); + } + + if ((action != NULL) && pcmk__xe_attr_is_true(match, "on_target")) { + pcmk__add_word(&(device->on_target_actions), 64, action); + } + } + + freeXpathObject(xpath); +} + +/*! + * \internal + * \brief Set a pcmk_*_action parameter if not already set + * + * \param[in,out] params Device parameters + * \param[in] action Name of action + * \param[in] value Value to use if action is not already set + */ +static void +map_action(GHashTable *params, const char *action, const char *value) +{ + char *key = crm_strdup_printf("pcmk_%s_action", action); + + if (g_hash_table_lookup(params, key)) { + crm_warn("Ignoring %s='%s', see %s instead", + STONITH_ATTR_ACTION_OP, value, key); + free(key); + } else { + crm_warn("Mapping %s='%s' to %s='%s'", + STONITH_ATTR_ACTION_OP, value, key, value); + g_hash_table_insert(params, key, strdup(value)); + } +} + +/*! + * \internal + * \brief Create device parameter table from XML + * + * \param[in] name Device name (used for logging only) + * \param[in] dev XML containing device parameters + */ +static GHashTable * +xml2device_params(const char *name, const xmlNode *dev) +{ + GHashTable *params = xml2list(dev); + const char *value; + + /* Action should never be specified in the device configuration, + * but we support it for users who are familiar with other software + * that worked that way. + */ + value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP); + if (value != NULL) { + crm_warn("%s has '%s' parameter, which should never be specified in configuration", + name, STONITH_ATTR_ACTION_OP); + + if (*value == '\0') { + crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP); + + } else if (strcmp(value, "reboot") == 0) { + crm_warn("Ignoring %s='reboot' (see stonith-action cluster property instead)", + STONITH_ATTR_ACTION_OP); + + } else if (strcmp(value, "off") == 0) { + map_action(params, "reboot", value); + + } else { + map_action(params, "off", value); + map_action(params, "reboot", value); + } + + g_hash_table_remove(params, STONITH_ATTR_ACTION_OP); + } + + return params; +} + +static const char * +target_list_type(stonith_device_t * dev) +{ + const char *check_type = NULL; + + check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK); + + if (check_type == NULL) { + + if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) { + check_type = "static-list"; + } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) { + check_type = "static-list"; + } else if (pcmk_is_set(dev->flags, st_device_supports_list)) { + check_type = "dynamic-list"; + } else if (pcmk_is_set(dev->flags, st_device_supports_status)) { + check_type = "status"; + } else { + check_type = PCMK__VALUE_NONE; + } + } + + return check_type; +} + +static stonith_device_t * +build_device_from_xml(xmlNode *dev) +{ + const char *value; + stonith_device_t *device = NULL; + char *agent = crm_element_value_copy(dev, "agent"); + + CRM_CHECK(agent != NULL, return device); + + device = calloc(1, sizeof(stonith_device_t)); + + CRM_CHECK(device != NULL, {free(agent); return device;}); + + device->id = crm_element_value_copy(dev, XML_ATTR_ID); + device->agent = agent; + device->namespace = crm_element_value_copy(dev, "namespace"); + device->params = xml2device_params(device->id, dev); + + value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST); + if (value) { + device->targets = stonith__parse_targets(value); + } + + value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP); + device->aliases = build_port_aliases(value, &(device->targets)); + + value = target_list_type(device); + if (!pcmk__str_eq(value, "static-list", pcmk__str_casei) && device->targets) { + /* Other than "static-list", dev-> targets is unnecessary. */ + g_list_free_full(device->targets, free); + device->targets = NULL; + } + switch (get_agent_metadata(device->agent, &device->agent_metadata)) { + case pcmk_rc_ok: + if (device->agent_metadata) { + read_action_metadata(device); + stonith__device_parameter_flags(&(device->flags), device->id, + device->agent_metadata); + } + break; + + case EAGAIN: + if (device->timer == NULL) { + device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000, + TRUE, get_agent_metadata_cb, device); + } + if (!mainloop_timer_running(device->timer)) { + mainloop_timer_start(device->timer); + } + break; + + default: + break; + } + + value = g_hash_table_lookup(device->params, "nodeid"); + if (!value) { + device->include_nodeid = is_nodeid_required(device->agent_metadata); + } + + value = crm_element_value(dev, "rsc_provides"); + if (pcmk__str_eq(value, PCMK__VALUE_UNFENCING, pcmk__str_casei)) { + device->automatic_unfencing = TRUE; + } + + if (is_action_required("on", device)) { + crm_info("Fencing device '%s' requires unfencing", device->id); + } + + if (device->on_target_actions != NULL) { + crm_info("Fencing device '%s' requires actions (%s) to be executed " + "on target", device->id, + (const char *) device->on_target_actions->str); + } + + device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); + /* TODO: Hook up priority */ + + return device; +} + +static void +schedule_internal_command(const char *origin, + stonith_device_t * device, + const char *action, + const char *target, + int timeout, + void *internal_user_data, + void (*done_cb) (int pid, + const pcmk__action_result_t *result, + void *user_data)) +{ + async_command_t *cmd = NULL; + + cmd = calloc(1, sizeof(async_command_t)); + + cmd->id = -1; + cmd->default_timeout = timeout ? timeout : 60; + cmd->timeout = cmd->default_timeout; + cmd->action = strdup(action); + pcmk__str_update(&cmd->target, target); + cmd->device = strdup(device->id); + cmd->origin = strdup(origin); + cmd->client = strdup(crm_system_name); + cmd->client_name = strdup(crm_system_name); + + cmd->internal_user_data = internal_user_data; + cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */ + + schedule_stonith_command(cmd, device); +} + +// Fence agent status commands use custom exit status codes +enum fence_status_code { + fence_status_invalid = -1, + fence_status_active = 0, + fence_status_unknown = 1, + fence_status_inactive = 2, +}; + +static void +status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) +{ + async_command_t *cmd = user_data; + struct device_search_s *search = cmd->internal_user_data; + stonith_device_t *dev = cmd_device(cmd); + gboolean can = FALSE; + + free_async_command(cmd); + + if (!dev) { + search_devices_record_result(search, NULL, FALSE); + return; + } + + mainloop_set_trigger(dev->work); + + if (result->execution_status != PCMK_EXEC_DONE) { + crm_warn("Assuming %s cannot fence %s " + "because status could not be executed: %s%s%s%s", + dev->id, search->host, + pcmk_exec_status_str(result->execution_status), + ((result->exit_reason == NULL)? "" : " ("), + ((result->exit_reason == NULL)? "" : result->exit_reason), + ((result->exit_reason == NULL)? "" : ")")); + search_devices_record_result(search, dev->id, FALSE); + return; + } + + switch (result->exit_status) { + case fence_status_unknown: + crm_trace("%s reported it cannot fence %s", dev->id, search->host); + break; + + case fence_status_active: + case fence_status_inactive: + crm_trace("%s reported it can fence %s", dev->id, search->host); + can = TRUE; + break; + + default: + crm_warn("Assuming %s cannot fence %s " + "(status returned unknown code %d)", + dev->id, search->host, result->exit_status); + break; + } + search_devices_record_result(search, dev->id, can); +} + +static void +dynamic_list_search_cb(int pid, const pcmk__action_result_t *result, + void *user_data) +{ + async_command_t *cmd = user_data; + struct device_search_s *search = cmd->internal_user_data; + stonith_device_t *dev = cmd_device(cmd); + gboolean can_fence = FALSE; + + free_async_command(cmd); + + /* Host/alias must be in the list output to be eligible to be fenced + * + * Will cause problems if down'd nodes aren't listed or (for virtual nodes) + * if the guest is still listed despite being moved to another machine + */ + if (!dev) { + search_devices_record_result(search, NULL, FALSE); + return; + } + + mainloop_set_trigger(dev->work); + + if (pcmk__result_ok(result)) { + crm_info("Refreshing target list for %s", dev->id); + g_list_free_full(dev->targets, free); + dev->targets = stonith__parse_targets(result->action_stdout); + dev->targets_age = time(NULL); + + } else if (dev->targets != NULL) { + if (result->execution_status == PCMK_EXEC_DONE) { + crm_info("Reusing most recent target list for %s " + "because list returned error code %d", + dev->id, result->exit_status); + } else { + crm_info("Reusing most recent target list for %s " + "because list could not be executed: %s%s%s%s", + dev->id, pcmk_exec_status_str(result->execution_status), + ((result->exit_reason == NULL)? "" : " ("), + ((result->exit_reason == NULL)? "" : result->exit_reason), + ((result->exit_reason == NULL)? "" : ")")); + } + + } else { // We have never successfully executed list + if (result->execution_status == PCMK_EXEC_DONE) { + crm_warn("Assuming %s cannot fence %s " + "because list returned error code %d", + dev->id, search->host, result->exit_status); + } else { + crm_warn("Assuming %s cannot fence %s " + "because list could not be executed: %s%s%s%s", + dev->id, search->host, + pcmk_exec_status_str(result->execution_status), + ((result->exit_reason == NULL)? "" : " ("), + ((result->exit_reason == NULL)? "" : result->exit_reason), + ((result->exit_reason == NULL)? "" : ")")); + } + + /* Fall back to pcmk_host_check="status" if the user didn't explicitly + * specify "dynamic-list". + */ + if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) { + crm_notice("Switching to pcmk_host_check='status' for %s", dev->id); + g_hash_table_replace(dev->params, strdup(PCMK_STONITH_HOST_CHECK), + strdup("status")); + } + } + + if (dev->targets) { + const char *alias = g_hash_table_lookup(dev->aliases, search->host); + + if (!alias) { + alias = search->host; + } + if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) { + can_fence = TRUE; + } + } + search_devices_record_result(search, dev->id, can_fence); +} + +/*! + * \internal + * \brief Returns true if any key in first is not in second or second has a different value for key + */ +static int +device_params_diff(GHashTable *first, GHashTable *second) { + char *key = NULL; + char *value = NULL; + GHashTableIter gIter; + + g_hash_table_iter_init(&gIter, first); + while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) { + + if(strstr(key, "CRM_meta") == key) { + continue; + } else if(strcmp(key, "crm_feature_set") == 0) { + continue; + } else { + char *other_value = g_hash_table_lookup(second, key); + + if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) { + crm_trace("Different value for %s: %s != %s", key, other_value, value); + return 1; + } + } + } + + return 0; +} + +/*! + * \internal + * \brief Checks to see if an identical device already exists in the device_list + */ +static stonith_device_t * +device_has_duplicate(const stonith_device_t *device) +{ + stonith_device_t *dup = g_hash_table_lookup(device_list, device->id); + + if (!dup) { + crm_trace("No match for %s", device->id); + return NULL; + + } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) { + crm_trace("Different agent: %s != %s", dup->agent, device->agent); + return NULL; + } + + /* Use calculate_operation_digest() here? */ + if (device_params_diff(device->params, dup->params) || + device_params_diff(dup->params, device->params)) { + return NULL; + } + + crm_trace("Match"); + return dup; +} + +int +stonith_device_register(xmlNode *dev, gboolean from_cib) +{ + stonith_device_t *dup = NULL; + stonith_device_t *device = build_device_from_xml(dev); + guint ndevices = 0; + int rv = pcmk_ok; + + CRM_CHECK(device != NULL, return -ENOMEM); + + /* do we have a watchdog-device? */ + if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) || + pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, + STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do { + if (stonith_watchdog_timeout_ms <= 0) { + crm_err("Ignoring watchdog fence device without " + "stonith-watchdog-timeout set."); + rv = -ENODEV; + /* fall through to cleanup & return */ + } else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, + STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { + crm_err("Ignoring watchdog fence device with unknown " + "agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.", + device->agent?device->agent:""); + rv = -ENODEV; + /* fall through to cleanup & return */ + } else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, + pcmk__str_none)) { + crm_err("Ignoring watchdog fence device " + "named %s !='"STONITH_WATCHDOG_ID"'.", + device->id?device->id:""); + rv = -ENODEV; + /* fall through to cleanup & return */ + } else { + if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, + pcmk__str_none)) { + /* this either has an empty list or the targets + configured for watchdog-fencing + */ + g_list_free_full(stonith_watchdog_targets, free); + stonith_watchdog_targets = device->targets; + device->targets = NULL; + } + if (node_does_watchdog_fencing(stonith_our_uname)) { + g_list_free_full(device->targets, free); + device->targets = stonith__parse_targets(stonith_our_uname); + g_hash_table_replace(device->params, + strdup(PCMK_STONITH_HOST_LIST), + strdup(stonith_our_uname)); + /* proceed as with any other stonith-device */ + break; + } + + crm_debug("Skip registration of watchdog fence device on node not in host-list."); + /* cleanup and fall through to more cleanup and return */ + device->targets = NULL; + stonith_device_remove(device->id, from_cib); + } + free_device(device); + return rv; + } while (0); + + dup = device_has_duplicate(device); + if (dup) { + ndevices = g_hash_table_size(device_list); + crm_debug("Device '%s' already in device list (%d active device%s)", + device->id, ndevices, pcmk__plural_s(ndevices)); + free_device(device); + device = dup; + dup = g_hash_table_lookup(device_list, device->id); + dup->dirty = FALSE; + + } else { + stonith_device_t *old = g_hash_table_lookup(device_list, device->id); + + if (from_cib && old && old->api_registered) { + /* If the cib is writing over an entry that is shared with a stonith client, + * copy any pending ops that currently exist on the old entry to the new one. + * Otherwise the pending ops will be reported as failures + */ + crm_info("Overwriting existing entry for %s from CIB", device->id); + device->pending_ops = old->pending_ops; + device->api_registered = TRUE; + old->pending_ops = NULL; + if (device->pending_ops) { + mainloop_set_trigger(device->work); + } + } + g_hash_table_replace(device_list, device->id, device); + + ndevices = g_hash_table_size(device_list); + crm_notice("Added '%s' to device list (%d active device%s)", + device->id, ndevices, pcmk__plural_s(ndevices)); + } + + if (from_cib) { + device->cib_registered = TRUE; + } else { + device->api_registered = TRUE; + } + + return pcmk_ok; +} + +void +stonith_device_remove(const char *id, bool from_cib) +{ + stonith_device_t *device = g_hash_table_lookup(device_list, id); + guint ndevices = 0; + + if (!device) { + ndevices = g_hash_table_size(device_list); + crm_info("Device '%s' not found (%d active device%s)", + id, ndevices, pcmk__plural_s(ndevices)); + return; + } + + if (from_cib) { + device->cib_registered = FALSE; + } else { + device->verified = FALSE; + device->api_registered = FALSE; + } + + if (!device->cib_registered && !device->api_registered) { + g_hash_table_remove(device_list, id); + ndevices = g_hash_table_size(device_list); + crm_info("Removed '%s' from device list (%d active device%s)", + id, ndevices, pcmk__plural_s(ndevices)); + } else { + crm_trace("Not removing '%s' from device list (%d active) because " + "still registered via:%s%s", + id, g_hash_table_size(device_list), + (device->cib_registered? " cib" : ""), + (device->api_registered? " api" : "")); + } +} + +/*! + * \internal + * \brief Return the number of stonith levels registered for a node + * + * \param[in] tp Node's topology table entry + * + * \return Number of non-NULL levels in topology entry + * \note This function is used only for log messages. + */ +static int +count_active_levels(const stonith_topology_t *tp) +{ + int lpc = 0; + int count = 0; + + for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { + if (tp->levels[lpc] != NULL) { + count++; + } + } + return count; +} + +static void +free_topology_entry(gpointer data) +{ + stonith_topology_t *tp = data; + + int lpc = 0; + + for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { + if (tp->levels[lpc] != NULL) { + g_list_free_full(tp->levels[lpc], free); + } + } + free(tp->target); + free(tp->target_value); + free(tp->target_pattern); + free(tp->target_attribute); + free(tp); +} + +void +free_topology_list(void) +{ + if (topology != NULL) { + g_hash_table_destroy(topology); + topology = NULL; + } +} + +void +init_topology_list(void) +{ + if (topology == NULL) { + topology = pcmk__strkey_table(NULL, free_topology_entry); + } +} + +char * +stonith_level_key(const xmlNode *level, enum fenced_target_by mode) +{ + if (mode == fenced_target_by_unknown) { + mode = unpack_level_kind(level); + } + switch (mode) { + case fenced_target_by_name: + return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET); + + case fenced_target_by_pattern: + return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN); + + case fenced_target_by_attribute: + return crm_strdup_printf("%s=%s", + crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE), + crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE)); + + default: + return crm_strdup_printf("unknown-%s", ID(level)); + } +} + +/*! + * \internal + * \brief Parse target identification from topology level XML + * + * \param[in] level Topology level XML to parse + * + * \return How to identify target of \p level + */ +static enum fenced_target_by +unpack_level_kind(const xmlNode *level) +{ + if (crm_element_value(level, XML_ATTR_STONITH_TARGET) != NULL) { + return fenced_target_by_name; + } + if (crm_element_value(level, XML_ATTR_STONITH_TARGET_PATTERN) != NULL) { + return fenced_target_by_pattern; + } + if (!stand_alone /* if standalone, there's no attribute manager */ + && (crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE) != NULL) + && (crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE) != NULL)) { + return fenced_target_by_attribute; + } + return fenced_target_by_unknown; +} + +static stonith_key_value_t * +parse_device_list(const char *devices) +{ + int lpc = 0; + int max = 0; + int last = 0; + stonith_key_value_t *output = NULL; + + if (devices == NULL) { + return output; + } + + max = strlen(devices); + for (lpc = 0; lpc <= max; lpc++) { + if (devices[lpc] == ',' || devices[lpc] == 0) { + char *line = strndup(devices + last, lpc - last); + + output = stonith_key_value_add(output, NULL, line); + free(line); + + last = lpc + 1; + } + } + + return output; +} + +/*! + * \internal + * \brief Unpack essential information from topology request XML + * + * \param[in] xml Request XML to search + * \param[out] mode If not NULL, where to store level kind + * \param[out] target If not NULL, where to store representation of target + * \param[out] id If not NULL, where to store level number + * \param[out] desc If not NULL, where to store log-friendly level description + * + * \return Topology level XML from within \p xml, or NULL if not found + * \note The caller is responsible for freeing \p *target and \p *desc if set. + */ +static xmlNode * +unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target, + int *id, char **desc) +{ + enum fenced_target_by local_mode = fenced_target_by_unknown; + char *local_target = NULL; + int local_id = 0; + + /* The level element can be the top element or lower. If top level, don't + * search by xpath, because it might give multiple hits if the XML is the + * CIB. + */ + if ((xml != NULL) + && !pcmk__str_eq(TYPE(xml), XML_TAG_FENCING_LEVEL, pcmk__str_none)) { + xml = get_xpath_object("//" XML_TAG_FENCING_LEVEL, xml, LOG_WARNING); + } + + if (xml == NULL) { + if (desc != NULL) { + *desc = crm_strdup_printf("missing"); + } + } else { + local_mode = unpack_level_kind(xml); + local_target = stonith_level_key(xml, local_mode); + crm_element_value_int(xml, XML_ATTR_STONITH_INDEX, &local_id); + if (desc != NULL) { + *desc = crm_strdup_printf("%s[%d]", local_target, local_id); + } + } + + if (mode != NULL) { + *mode = local_mode; + } + if (id != NULL) { + *id = local_id; + } + + if (target != NULL) { + *target = local_target; + } else { + free(local_target); + } + + return xml; +} + +/*! + * \internal + * \brief Register a fencing topology level for a target + * + * Given an XML request specifying the target name, level index, and device IDs + * for the level, this will create an entry for the target in the global topology + * table if one does not already exist, then append the specified device IDs to + * the entry's device list for the specified level. + * + * \param[in] msg XML request for STONITH level registration + * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]" + * \param[out] result Where to set result of registration + */ +void +fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result) +{ + int id = 0; + xmlNode *level; + enum fenced_target_by mode; + char *target; + + stonith_topology_t *tp; + stonith_key_value_t *dIter = NULL; + stonith_key_value_t *devices = NULL; + + CRM_CHECK((msg != NULL) && (result != NULL), return); + + level = unpack_level_request(msg, &mode, &target, &id, desc); + if (level == NULL) { + fenced_set_protocol_error(result); + return; + } + + // Ensure an ID was given (even the client API adds an ID) + if (pcmk__str_empty(ID(level))) { + crm_warn("Ignoring registration for topology level without ID"); + free(target); + crm_log_xml_trace(level, "Bad level"); + pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, + "Topology level is invalid without ID"); + return; + } + + // Ensure a valid target was specified + if (mode == fenced_target_by_unknown) { + crm_warn("Ignoring registration for topology level '%s' " + "without valid target", ID(level)); + free(target); + crm_log_xml_trace(level, "Bad level"); + pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, + "Invalid target for topology level '%s'", + ID(level)); + return; + } + + // Ensure level ID is in allowed range + if ((id <= 0) || (id >= ST_LEVEL_MAX)) { + crm_warn("Ignoring topology registration for %s with invalid level %d", + target, id); + free(target); + crm_log_xml_trace(level, "Bad level"); + pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, + "Invalid level number '%s' for topology level '%s'", + pcmk__s(crm_element_value(level, + XML_ATTR_STONITH_INDEX), + ""), + ID(level)); + return; + } + + /* Find or create topology table entry */ + tp = g_hash_table_lookup(topology, target); + if (tp == NULL) { + tp = calloc(1, sizeof(stonith_topology_t)); + if (tp == NULL) { + pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_ERROR, + strerror(ENOMEM)); + free(target); + return; + } + tp->kind = mode; + tp->target = target; + tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE); + tp->target_pattern = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN); + tp->target_attribute = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE); + + g_hash_table_replace(topology, tp->target, tp); + crm_trace("Added %s (%d) to the topology (%d active entries)", + target, (int) mode, g_hash_table_size(topology)); + } else { + free(target); + } + + if (tp->levels[id] != NULL) { + crm_info("Adding to the existing %s[%d] topology entry", + tp->target, id); + } + + devices = parse_device_list(crm_element_value(level, XML_ATTR_STONITH_DEVICES)); + for (dIter = devices; dIter; dIter = dIter->next) { + const char *device = dIter->value; + + crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id); + tp->levels[id] = g_list_append(tp->levels[id], strdup(device)); + } + stonith_key_value_freeall(devices, 1, 1); + + { + int nlevels = count_active_levels(tp); + + crm_info("Target %s has %d active fencing level%s", + tp->target, nlevels, pcmk__plural_s(nlevels)); + } + + pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); +} + +/*! + * \internal + * \brief Unregister a fencing topology level for a target + * + * Given an XML request specifying the target name and level index (or 0 for all + * levels), this will remove any corresponding entry for the target from the + * global topology table. + * + * \param[in] msg XML request for STONITH level registration + * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]" + * \param[out] result Where to set result of unregistration + */ +void +fenced_unregister_level(xmlNode *msg, char **desc, + pcmk__action_result_t *result) +{ + int id = -1; + stonith_topology_t *tp; + char *target; + xmlNode *level = NULL; + + CRM_CHECK(result != NULL, return); + + level = unpack_level_request(msg, NULL, &target, &id, desc); + if (level == NULL) { + fenced_set_protocol_error(result); + return; + } + + // Ensure level ID is in allowed range + if ((id < 0) || (id >= ST_LEVEL_MAX)) { + crm_warn("Ignoring topology unregistration for %s with invalid level %d", + target, id); + free(target); + crm_log_xml_trace(level, "Bad level"); + pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, + "Invalid level number '%s' for topology level %s", + pcmk__s(crm_element_value(level, + XML_ATTR_STONITH_INDEX), + ""), + + // Client API doesn't add ID to unregistration XML + pcmk__s(ID(level), "")); + return; + } + + tp = g_hash_table_lookup(topology, target); + if (tp == NULL) { + guint nentries = g_hash_table_size(topology); + + crm_info("No fencing topology found for %s (%d active %s)", + target, nentries, + pcmk__plural_alt(nentries, "entry", "entries")); + + } else if (id == 0 && g_hash_table_remove(topology, target)) { + guint nentries = g_hash_table_size(topology); + + crm_info("Removed all fencing topology entries related to %s " + "(%d active %s remaining)", target, nentries, + pcmk__plural_alt(nentries, "entry", "entries")); + + } else if (tp->levels[id] != NULL) { + guint nlevels; + + g_list_free_full(tp->levels[id], free); + tp->levels[id] = NULL; + + nlevels = count_active_levels(tp); + crm_info("Removed level %d from fencing topology for %s " + "(%d active level%s remaining)", + id, target, nlevels, pcmk__plural_s(nlevels)); + } + + free(target); + pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); +} + +static char * +list_to_string(GList *list, const char *delim, gboolean terminate_with_delim) +{ + int max = g_list_length(list); + size_t delim_len = delim?strlen(delim):0; + size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0); + char *rv; + GList *gIter; + + for (gIter = list; gIter != NULL; gIter = gIter->next) { + const char *value = (const char *) gIter->data; + + alloc_size += strlen(value); + } + rv = calloc(alloc_size, sizeof(char)); + if (rv) { + char *pos = rv; + const char *lead_delim = ""; + + for (gIter = list; gIter != NULL; gIter = gIter->next) { + const char *value = (const char *) gIter->data; + + pos = &pos[sprintf(pos, "%s%s", lead_delim, value)]; + lead_delim = delim; + } + if (max && terminate_with_delim) { + sprintf(pos, "%s", delim); + } + } + return rv; +} + +/*! + * \internal + * \brief Execute a fence agent action directly (and asynchronously) + * + * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action + * directly on a specified device. Only list, monitor, and status actions are + * expected to use this call, though it should work with any agent command. + * + * \param[in] msg Request XML specifying action + * \param[out] result Where to store result of action + * + * \note If the action is monitor, the device must be registered via the API + * (CIB registration is not sufficient), because monitor should not be + * possible unless the device is "started" (API registered). + */ +static void +execute_agent_action(xmlNode *msg, pcmk__action_result_t *result) +{ + xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR); + xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR); + const char *id = crm_element_value(dev, F_STONITH_DEVICE); + const char *action = crm_element_value(op, F_STONITH_ACTION); + async_command_t *cmd = NULL; + stonith_device_t *device = NULL; + + if ((id == NULL) || (action == NULL)) { + crm_info("Malformed API action request: device %s, action %s", + (id? id : "not specified"), + (action? action : "not specified")); + fenced_set_protocol_error(result); + return; + } + + if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) { + // Watchdog agent actions are implemented internally + if (stonith_watchdog_timeout_ms <= 0) { + pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, + "Watchdog fence device not configured"); + return; + + } else if (pcmk__str_eq(action, "list", pcmk__str_none)) { + pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + pcmk__set_result_output(result, + list_to_string(stonith_watchdog_targets, + "\n", TRUE), + NULL); + return; + + } else if (pcmk__str_eq(action, "monitor", pcmk__str_none)) { + pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return; + } + } + + device = g_hash_table_lookup(device_list, id); + if (device == NULL) { + crm_info("Ignoring API '%s' action request because device %s not found", + action, id); + pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, + "'%s' not found", id); + return; + + } else if (!device->api_registered && !strcmp(action, "monitor")) { + // Monitors may run only on "started" (API-registered) devices + crm_info("Ignoring API '%s' action request because device %s not active", + action, id); + pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, + "'%s' not active", id); + return; + } + + cmd = create_async_command(msg); + if (cmd == NULL) { + crm_log_xml_warn(msg, "invalid"); + fenced_set_protocol_error(result); + return; + } + + schedule_stonith_command(cmd, device); + pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); +} + +static void +search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence) +{ + search->replies_received++; + if (can_fence && device) { + if (search->support_action_only != st_device_supports_none) { + stonith_device_t *dev = g_hash_table_lookup(device_list, device); + if (dev && !pcmk_is_set(dev->flags, search->support_action_only)) { + return; + } + } + search->capable = g_list_append(search->capable, strdup(device)); + } + + if (search->replies_needed == search->replies_received) { + + guint ndevices = g_list_length(search->capable); + + crm_debug("Search found %d device%s that can perform '%s' targeting %s", + ndevices, pcmk__plural_s(ndevices), + (search->action? search->action : "unknown action"), + (search->host? search->host : "any node")); + + search->callback(search->capable, search->user_data); + free(search->host); + free(search->action); + free(search); + } +} + +/*! + * \internal + * \brief Check whether the local host is allowed to execute a fencing action + * + * \param[in] device Fence device to check + * \param[in] action Fence action to check + * \param[in] target Hostname of fence target + * \param[in] allow_suicide Whether self-fencing is allowed for this operation + * + * \return TRUE if local host is allowed to execute action, FALSE otherwise + */ +static gboolean +localhost_is_eligible(const stonith_device_t *device, const char *action, + const char *target, gboolean allow_suicide) +{ + gboolean localhost_is_target = pcmk__str_eq(target, stonith_our_uname, + pcmk__str_casei); + + if ((device != NULL) && (action != NULL) + && (device->on_target_actions != NULL) + && (strstr((const char*) device->on_target_actions->str, + action) != NULL)) { + + if (!localhost_is_target) { + crm_trace("Operation '%s' using %s can only be executed for local " + "host, not %s", action, device->id, target); + return FALSE; + } + + } else if (localhost_is_target && !allow_suicide) { + crm_trace("'%s' operation does not support self-fencing", action); + return FALSE; + } + return TRUE; +} + +/*! + * \internal + * \brief Check if local node is allowed to execute (possibly remapped) action + * + * \param[in] device Fence device to check + * \param[in] action Fence action to check + * \param[in] target Node name of fence target + * \param[in] allow_self Whether self-fencing is allowed for this operation + * + * \return true if local node is allowed to execute \p action or any actions it + * might be remapped to, otherwise false + */ +static bool +localhost_is_eligible_with_remap(const stonith_device_t *device, + const char *action, const char *target, + gboolean allow_self) +{ + // Check exact action + if (localhost_is_eligible(device, action, target, allow_self)) { + return true; + } + + // Check potential remaps + + if (pcmk__str_eq(action, "reboot", pcmk__str_none)) { + /* "reboot" might get remapped to "off" then "on", so even if reboot is + * disallowed, return true if either of those is allowed. We'll report + * the disallowed actions with the results. We never allow self-fencing + * for remapped "on" actions because the target is off at that point. + */ + if (localhost_is_eligible(device, "off", target, allow_self) + || localhost_is_eligible(device, "on", target, FALSE)) { + return true; + } + } + + return false; +} + +static void +can_fence_host_with_device(stonith_device_t *dev, + struct device_search_s *search) +{ + gboolean can = FALSE; + const char *check_type = "Internal bug"; + const char *target = NULL; + const char *alias = NULL; + const char *dev_id = "Unspecified device"; + const char *action = (search == NULL)? NULL : search->action; + + CRM_CHECK((dev != NULL) && (action != NULL), goto search_report_results); + + if (dev->id != NULL) { + dev_id = dev->id; + } + + target = search->host; + if (target == NULL) { + can = TRUE; + check_type = "No target"; + goto search_report_results; + } + + /* Answer immediately if the device does not support the action + * or the local node is not allowed to perform it + */ + if (pcmk__str_eq(action, "on", pcmk__str_none) + && !pcmk_is_set(dev->flags, st_device_supports_on)) { + check_type = "Agent does not support 'on'"; + goto search_report_results; + + } else if (!localhost_is_eligible_with_remap(dev, action, target, + search->allow_suicide)) { + check_type = "This node is not allowed to execute action"; + goto search_report_results; + } + + // Check eligibility as specified by pcmk_host_check + check_type = target_list_type(dev); + alias = g_hash_table_lookup(dev->aliases, target); + if (pcmk__str_eq(check_type, PCMK__VALUE_NONE, pcmk__str_casei)) { + can = TRUE; + + } else if (pcmk__str_eq(check_type, "static-list", pcmk__str_casei)) { + if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) { + can = TRUE; + } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP) + && g_hash_table_lookup(dev->aliases, target)) { + can = TRUE; + } + + } else if (pcmk__str_eq(check_type, "dynamic-list", pcmk__str_casei)) { + time_t now = time(NULL); + + if (dev->targets == NULL || dev->targets_age + 60 < now) { + int device_timeout = get_action_timeout(dev, "list", search->per_device_timeout); + + if (device_timeout > search->per_device_timeout) { + crm_notice("Since the pcmk_list_timeout(%ds) parameter of %s is larger than stonith-timeout(%ds), timeout may occur", + device_timeout, dev_id, search->per_device_timeout); + } + + crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", + check_type, dev_id, target, action); + + schedule_internal_command(__func__, dev, "list", NULL, + search->per_device_timeout, search, dynamic_list_search_cb); + + /* we'll respond to this search request async in the cb */ + return; + } + + if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets, + pcmk__str_casei)) { + can = TRUE; + } + + } else if (pcmk__str_eq(check_type, "status", pcmk__str_casei)) { + int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout); + + if (device_timeout > search->per_device_timeout) { + crm_notice("Since the pcmk_status_timeout(%ds) parameter of %s is larger than stonith-timeout(%ds), timeout may occur", + device_timeout, dev_id, search->per_device_timeout); + } + + crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", + check_type, dev_id, target, action); + schedule_internal_command(__func__, dev, "status", target, + search->per_device_timeout, search, status_search_cb); + /* we'll respond to this search request async in the cb */ + return; + } else { + crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type); + check_type = "Invalid " PCMK_STONITH_HOST_CHECK; + } + + search_report_results: + crm_info("%s is%s eligible to fence (%s) %s%s%s%s: %s", + dev_id, (can? "" : " not"), pcmk__s(action, "unspecified action"), + pcmk__s(target, "unspecified target"), + (alias == NULL)? "" : " (as '", pcmk__s(alias, ""), + (alias == NULL)? "" : "')", check_type); + search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can); +} + +static void +search_devices(gpointer key, gpointer value, gpointer user_data) +{ + stonith_device_t *dev = value; + struct device_search_s *search = user_data; + + can_fence_host_with_device(dev, search); +} + +#define DEFAULT_QUERY_TIMEOUT 20 +static void +get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data, + void (*callback) (GList * devices, void *user_data), uint32_t support_action_only) +{ + struct device_search_s *search; + guint ndevices = g_hash_table_size(device_list); + + if (ndevices == 0) { + callback(NULL, user_data); + return; + } + + search = calloc(1, sizeof(struct device_search_s)); + if (!search) { + crm_crit("Cannot search for capable fence devices: %s", + strerror(ENOMEM)); + callback(NULL, user_data); + return; + } + + pcmk__str_update(&search->host, host); + pcmk__str_update(&search->action, action); + search->per_device_timeout = timeout; + search->allow_suicide = suicide; + search->callback = callback; + search->user_data = user_data; + search->support_action_only = support_action_only; + + /* We are guaranteed this many replies, even if a device is + * unregistered while the search is in progress. + */ + search->replies_needed = ndevices; + + crm_debug("Searching %d device%s to see which can execute '%s' targeting %s", + ndevices, pcmk__plural_s(ndevices), + (search->action? search->action : "unknown action"), + (search->host? search->host : "any node")); + g_hash_table_foreach(device_list, search_devices, search); +} + +struct st_query_data { + xmlNode *reply; + char *remote_peer; + char *client_id; + char *target; + char *action; + int call_options; +}; + +/*! + * \internal + * \brief Add action-specific attributes to query reply XML + * + * \param[in,out] xml XML to add attributes to + * \param[in] action Fence action + * \param[in] device Fence device + * \param[in] target Fence target + */ +static void +add_action_specific_attributes(xmlNode *xml, const char *action, + const stonith_device_t *device, + const char *target) +{ + int action_specific_timeout; + int delay_max; + int delay_base; + + CRM_CHECK(xml && action && device, return); + + if (is_action_required(action, device)) { + crm_trace("Action '%s' is required using %s", action, device->id); + crm_xml_add_int(xml, F_STONITH_DEVICE_REQUIRED, 1); + } + + action_specific_timeout = get_action_timeout(device, action, 0); + if (action_specific_timeout) { + crm_trace("Action '%s' has timeout %dms using %s", + action, action_specific_timeout, device->id); + crm_xml_add_int(xml, F_STONITH_ACTION_TIMEOUT, action_specific_timeout); + } + + delay_max = get_action_delay_max(device, action); + if (delay_max > 0) { + crm_trace("Action '%s' has maximum random delay %ds using %s", + action, delay_max, device->id); + crm_xml_add_int(xml, F_STONITH_DELAY_MAX, delay_max); + } + + delay_base = get_action_delay_base(device, action, target); + if (delay_base > 0) { + crm_xml_add_int(xml, F_STONITH_DELAY_BASE, delay_base); + } + + if ((delay_max > 0) && (delay_base == 0)) { + crm_trace("Action '%s' has maximum random delay %ds using %s", + action, delay_max, device->id); + } else if ((delay_max == 0) && (delay_base > 0)) { + crm_trace("Action '%s' has a static delay of %ds using %s", + action, delay_base, device->id); + } else if ((delay_max > 0) && (delay_base > 0)) { + crm_trace("Action '%s' has a minimum delay of %ds and a randomly chosen " + "maximum delay of %ds using %s", + action, delay_base, delay_max, device->id); + } +} + +/*! + * \internal + * \brief Add "disallowed" attribute to query reply XML if appropriate + * + * \param[in,out] xml XML to add attribute to + * \param[in] action Fence action + * \param[in] device Fence device + * \param[in] target Fence target + * \param[in] allow_suicide Whether self-fencing is allowed + */ +static void +add_disallowed(xmlNode *xml, const char *action, const stonith_device_t *device, + const char *target, gboolean allow_suicide) +{ + if (!localhost_is_eligible(device, action, target, allow_suicide)) { + crm_trace("Action '%s' using %s is disallowed for local host", + action, device->id); + pcmk__xe_set_bool_attr(xml, F_STONITH_ACTION_DISALLOWED, true); + } +} + +/*! + * \internal + * \brief Add child element with action-specific values to query reply XML + * + * \param[in,out] xml XML to add attribute to + * \param[in] action Fence action + * \param[in] device Fence device + * \param[in] target Fence target + * \param[in] allow_suicide Whether self-fencing is allowed + */ +static void +add_action_reply(xmlNode *xml, const char *action, + const stonith_device_t *device, const char *target, + gboolean allow_suicide) +{ + xmlNode *child = create_xml_node(xml, F_STONITH_ACTION); + + crm_xml_add(child, XML_ATTR_ID, action); + add_action_specific_attributes(child, action, device, target); + add_disallowed(child, action, device, target, allow_suicide); +} + +static void +stonith_query_capable_device_cb(GList * devices, void *user_data) +{ + struct st_query_data *query = user_data; + int available_devices = 0; + xmlNode *dev = NULL; + xmlNode *list = NULL; + GList *lpc = NULL; + pcmk__client_t *client = NULL; + + if (query->client_id != NULL) { + client = pcmk__find_client_by_id(query->client_id); + if ((client == NULL) && (query->remote_peer == NULL)) { + crm_trace("Skipping reply to %s: no longer a client", + query->client_id); + goto done; + } + } + + /* Pack the results into XML */ + list = create_xml_node(NULL, __func__); + crm_xml_add(list, F_STONITH_TARGET, query->target); + for (lpc = devices; lpc != NULL; lpc = lpc->next) { + stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data); + const char *action = query->action; + + if (!device) { + /* It is possible the device got unregistered while + * determining who can fence the target */ + continue; + } + + available_devices++; + + dev = create_xml_node(list, F_STONITH_DEVICE); + crm_xml_add(dev, XML_ATTR_ID, device->id); + crm_xml_add(dev, "namespace", device->namespace); + crm_xml_add(dev, "agent", device->agent); + crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified); + crm_xml_add_int(dev, F_STONITH_DEVICE_SUPPORT_FLAGS, device->flags); + + /* If the originating fencer wants to reboot the node, and we have a + * capable device that doesn't support "reboot", remap to "off" instead. + */ + if (!pcmk_is_set(device->flags, st_device_supports_reboot) + && pcmk__str_eq(query->action, "reboot", pcmk__str_none)) { + crm_trace("%s doesn't support reboot, using values for off instead", + device->id); + action = "off"; + } + + /* Add action-specific values if available */ + add_action_specific_attributes(dev, action, device, query->target); + if (pcmk__str_eq(query->action, "reboot", pcmk__str_none)) { + /* A "reboot" *might* get remapped to "off" then "on", so after + * sending the "reboot"-specific values in the main element, we add + * sub-elements for "off" and "on" values. + * + * We short-circuited earlier if "reboot", "off" and "on" are all + * disallowed for the local host. However if only one or two are + * disallowed, we send back the results and mark which ones are + * disallowed. If "reboot" is disallowed, this might cause problems + * with older fencer versions, which won't check for it. Older + * versions will ignore "off" and "on", so they are not a problem. + */ + add_disallowed(dev, action, device, query->target, + pcmk_is_set(query->call_options, st_opt_allow_suicide)); + add_action_reply(dev, "off", device, query->target, + pcmk_is_set(query->call_options, st_opt_allow_suicide)); + add_action_reply(dev, "on", device, query->target, FALSE); + } + + /* A query without a target wants device parameters */ + if (query->target == NULL) { + xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS); + + g_hash_table_foreach(device->params, hash2field, attrs); + } + } + + crm_xml_add_int(list, F_STONITH_AVAILABLE_DEVICES, available_devices); + if (query->target) { + crm_debug("Found %d matching device%s for target '%s'", + available_devices, pcmk__plural_s(available_devices), + query->target); + } else { + crm_debug("%d device%s installed", + available_devices, pcmk__plural_s(available_devices)); + } + + if (list != NULL) { + crm_log_xml_trace(list, "Add query results"); + add_message_xml(query->reply, F_STONITH_CALLDATA, list); + } + + stonith_send_reply(query->reply, query->call_options, query->remote_peer, + client); + +done: + free_xml(query->reply); + free(query->remote_peer); + free(query->client_id); + free(query->target); + free(query->action); + free(query); + free_xml(list); + g_list_free_full(devices, free); +} + +/*! + * \internal + * \brief Log the result of an asynchronous command + * + * \param[in] cmd Command the result is for + * \param[in] result Result of command + * \param[in] pid Process ID of command, if available + * \param[in] next Alternate device that will be tried if command failed + * \param[in] op_merged Whether this command was merged with an earlier one + */ +static void +log_async_result(const async_command_t *cmd, + const pcmk__action_result_t *result, + int pid, const char *next, bool op_merged) +{ + int log_level = LOG_ERR; + int output_log_level = LOG_NEVER; + guint devices_remaining = g_list_length(cmd->next_device_iter); + + GString *msg = g_string_sized_new(80); // Reasonable starting size + + // Choose log levels appropriately if we have a result + if (pcmk__result_ok(result)) { + log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE; + if ((result->action_stdout != NULL) + && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_none)) { + output_log_level = LOG_DEBUG; + } + next = NULL; + } else { + log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR; + if ((result->action_stdout != NULL) + && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_none)) { + output_log_level = LOG_WARNING; + } + } + + // Build the log message piece by piece + pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL); + if (pid != 0) { + g_string_append_printf(msg, "[%d] ", pid); + } + if (cmd->target != NULL) { + pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL); + } + if (cmd->device != NULL) { + pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL); + } + + // Add exit status or execution status as appropriate + if (result->execution_status == PCMK_EXEC_DONE) { + g_string_append_printf(msg, "returned %d", result->exit_status); + } else { + pcmk__g_strcat(msg, "could not be executed: ", + pcmk_exec_status_str(result->execution_status), NULL); + } + + // Add exit reason and next device if appropriate + if (result->exit_reason != NULL) { + pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL); + } + if (next != NULL) { + pcmk__g_strcat(msg, ", retrying with ", next, NULL); + } + if (devices_remaining > 0) { + g_string_append_printf(msg, " (%u device%s remaining)", + (unsigned int) devices_remaining, + pcmk__plural_s(devices_remaining)); + } + g_string_append_printf(msg, " " CRM_XS " %scall %d from %s", + (op_merged? "merged " : ""), cmd->id, + cmd->client_name); + + // Log the result + do_crm_log(log_level, "%s", msg->str); + g_string_free(msg, TRUE); + + // Log the output (which may have multiple lines), if appropriate + if (output_log_level != LOG_NEVER) { + char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid); + + crm_log_output(output_log_level, prefix, result->action_stdout); + free(prefix); + } +} + +/*! + * \internal + * \brief Reply to requester after asynchronous command completion + * + * \param[in] cmd Command that completed + * \param[in] result Result of command + * \param[in] pid Process ID of command, if available + * \param[in] merged If true, command was merged with another, not executed + */ +static void +send_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result, + int pid, bool merged) +{ + xmlNode *reply = NULL; + pcmk__client_t *client = NULL; + + CRM_CHECK((cmd != NULL) && (result != NULL), return); + + log_async_result(cmd, result, pid, NULL, merged); + + if (cmd->client != NULL) { + client = pcmk__find_client_by_id(cmd->client); + if ((client == NULL) && (cmd->origin == NULL)) { + crm_trace("Skipping reply to %s: no longer a client", cmd->client); + return; + } + } + + reply = construct_async_reply(cmd, result); + if (merged) { + pcmk__xe_set_bool_attr(reply, F_STONITH_MERGED, true); + } + + if (!stand_alone && pcmk__is_fencing_action(cmd->action) + && pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) { + /* The target was also the originator, so broadcast the result on its + * behalf (since it will be unable to). + */ + crm_trace("Broadcast '%s' result for %s (target was also originator)", + cmd->action, cmd->target); + crm_xml_add(reply, F_SUBTYPE, "broadcast"); + crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); + send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); + } else { + // Reply only to the originator + stonith_send_reply(reply, cmd->options, cmd->origin, client); + } + + crm_log_xml_trace(reply, "Reply"); + free_xml(reply); + + if (stand_alone) { + /* Do notification with a clean data object */ + xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); + + stonith__xe_set_result(notify_data, result); + crm_xml_add(notify_data, F_STONITH_TARGET, cmd->target); + crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op); + crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost"); + crm_xml_add(notify_data, F_STONITH_DEVICE, cmd->device); + crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); + crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client); + + fenced_send_notification(T_STONITH_NOTIFY_FENCE, result, notify_data); + fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL); + } +} + +static void +cancel_stonith_command(async_command_t * cmd) +{ + stonith_device_t *device = cmd_device(cmd); + + if (device) { + crm_trace("Cancel scheduled '%s' action using %s", + cmd->action, device->id); + device->pending_ops = g_list_remove(device->pending_ops, cmd); + } +} + +/*! + * \internal + * \brief Cancel and reply to any duplicates of a just-completed operation + * + * Check whether any fencing operations are scheduled to do the same thing as + * one that just succeeded. If so, rather than performing the same operation + * twice, return the result of this operation for all matching pending commands. + * + * \param[in,out] cmd Fencing operation that just succeeded + * \param[in] result Result of \p cmd + * \param[in] pid If nonzero, process ID of agent invocation (for logs) + * + * \note Duplicate merging will do the right thing for either type of remapped + * reboot. If the executing fencer remapped an unsupported reboot to off, + * then cmd->action will be "reboot" and will be merged with any other + * reboot requests. If the originating fencer remapped a topology reboot + * to off then on, we will get here once with cmd->action "off" and once + * with "on", and they will be merged separately with similar requests. + */ +static void +reply_to_duplicates(async_command_t *cmd, const pcmk__action_result_t *result, + int pid) +{ + GList *next = NULL; + + for (GList *iter = cmd_list; iter != NULL; iter = next) { + async_command_t *cmd_other = iter->data; + + next = iter->next; // We might delete this entry, so grab next now + + if (cmd == cmd_other) { + continue; + } + + /* A pending operation matches if: + * 1. The client connections are different. + * 2. The target is the same. + * 3. The fencing action is the same. + * 4. The device scheduled to execute the action is the same. + */ + if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) || + !pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) || + !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) || + !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) { + + continue; + } + + crm_notice("Merging fencing action '%s'%s%s originating from " + "client %s with identical fencing request from client %s", + cmd_other->action, + (cmd_other->target == NULL)? "" : " targeting ", + pcmk__s(cmd_other->target, ""), cmd_other->client_name, + cmd->client_name); + + // Stop tracking the duplicate, send its result, and cancel it + cmd_list = g_list_remove_link(cmd_list, iter); + send_async_reply(cmd_other, result, pid, true); + cancel_stonith_command(cmd_other); + + free_async_command(cmd_other); + g_list_free_1(iter); + } +} + +/*! + * \internal + * \brief Return the next required device (if any) for an operation + * + * \param[in,out] cmd Fencing operation that just succeeded + * + * \return Next device required for action if any, otherwise NULL + */ +static stonith_device_t * +next_required_device(async_command_t *cmd) +{ + for (GList *iter = cmd->next_device_iter; iter != NULL; iter = iter->next) { + stonith_device_t *next_device = g_hash_table_lookup(device_list, + iter->data); + + if (is_action_required(cmd->action, next_device)) { + /* This is only called for successful actions, so it's OK to skip + * non-required devices. + */ + cmd->next_device_iter = iter->next; + return next_device; + } + } + return NULL; +} + +static void +st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) +{ + async_command_t *cmd = user_data; + + stonith_device_t *device = NULL; + stonith_device_t *next_device = NULL; + + CRM_CHECK(cmd != NULL, return); + + device = cmd_device(cmd); + cmd->active_on = NULL; + + /* The device is ready to do something else now */ + if (device) { + if (!device->verified && pcmk__result_ok(result) && + (pcmk__strcase_any_of(cmd->action, "list", "monitor", "status", NULL))) { + + device->verified = TRUE; + } + + mainloop_set_trigger(device->work); + } + + if (pcmk__result_ok(result)) { + next_device = next_required_device(cmd); + + } else if ((cmd->next_device_iter != NULL) + && !is_action_required(cmd->action, device)) { + /* if this device didn't work out, see if there are any others we can try. + * if the failed device was 'required', we can't pick another device. */ + next_device = g_hash_table_lookup(device_list, + cmd->next_device_iter->data); + cmd->next_device_iter = cmd->next_device_iter->next; + } + + if (next_device == NULL) { + send_async_reply(cmd, result, pid, false); + if (pcmk__result_ok(result)) { + reply_to_duplicates(cmd, result, pid); + } + free_async_command(cmd); + + } else { // This operation requires more fencing + log_async_result(cmd, result, pid, next_device->id, false); + schedule_stonith_command(cmd, next_device); + } +} + +static gint +sort_device_priority(gconstpointer a, gconstpointer b) +{ + const stonith_device_t *dev_a = a; + const stonith_device_t *dev_b = b; + + if (dev_a->priority > dev_b->priority) { + return -1; + } else if (dev_a->priority < dev_b->priority) { + return 1; + } + return 0; +} + +static void +stonith_fence_get_devices_cb(GList * devices, void *user_data) +{ + async_command_t *cmd = user_data; + stonith_device_t *device = NULL; + guint ndevices = g_list_length(devices); + + crm_info("Found %d matching device%s for target '%s'", + ndevices, pcmk__plural_s(ndevices), cmd->target); + + if (devices != NULL) { + /* Order based on priority */ + devices = g_list_sort(devices, sort_device_priority); + device = g_hash_table_lookup(device_list, devices->data); + } + + if (device == NULL) { // No device found + pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; + + pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, + "No device configured for target '%s'", + cmd->target); + send_async_reply(cmd, &result, 0, false); + pcmk__reset_result(&result); + free_async_command(cmd); + g_list_free_full(devices, free); + + } else { // Device found, schedule it for fencing + cmd->device_list = devices; + cmd->next_device_iter = devices->next; + schedule_stonith_command(cmd, device); + } +} + +/*! + * \internal + * \brief Execute a fence action via the local node + * + * \param[in] msg Fencing request + * \param[out] result Where to store result of fence action + */ +static void +fence_locally(xmlNode *msg, pcmk__action_result_t *result) +{ + const char *device_id = NULL; + stonith_device_t *device = NULL; + async_command_t *cmd = NULL; + xmlNode *dev = NULL; + + CRM_CHECK((msg != NULL) && (result != NULL), return); + + dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); + + cmd = create_async_command(msg); + if (cmd == NULL) { + crm_log_xml_warn(msg, "invalid"); + fenced_set_protocol_error(result); + return; + } + + device_id = crm_element_value(dev, F_STONITH_DEVICE); + if (device_id != NULL) { + device = g_hash_table_lookup(device_list, device_id); + if (device == NULL) { + crm_err("Requested device '%s' is not available", device_id); + pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, + "Requested device '%s' not found", device_id); + return; + } + schedule_stonith_command(cmd, device); + + } else { + const char *host = crm_element_value(dev, F_STONITH_TARGET); + + if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) { + int nodeid = 0; + crm_node_t *node = NULL; + + pcmk__scan_min_int(host, &nodeid, 0); + node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY); + if (node != NULL) { + host = node->uname; + } + } + + /* If we get to here, then self-fencing is implicitly allowed */ + get_capable_devices(host, cmd->action, cmd->default_timeout, + TRUE, cmd, stonith_fence_get_devices_cb, + fenced_support_flag(cmd->action)); + } + + pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); +} + +/*! + * \internal + * \brief Build an XML reply for a fencing operation + * + * \param[in] request Request that reply is for + * \param[in] data If not NULL, add to reply as call data + * \param[in] result Full result of fencing operation + * + * \return Newly created XML reply + * \note The caller is responsible for freeing the result. + * \note This has some overlap with construct_async_reply(), but that copies + * values from an async_command_t, whereas this one copies them from the + * request. + */ +xmlNode * +fenced_construct_reply(const xmlNode *request, xmlNode *data, + const pcmk__action_result_t *result) +{ + xmlNode *reply = NULL; + + reply = create_xml_node(NULL, T_STONITH_REPLY); + + crm_xml_add(reply, "st_origin", __func__); + crm_xml_add(reply, F_TYPE, T_STONITH_NG); + stonith__xe_set_result(reply, result); + + if (request == NULL) { + /* Most likely, this is the result of a stonith operation that was + * initiated before we came up. Unfortunately that means we lack enough + * information to provide clients with a full result. + * + * @TODO Maybe synchronize this information at start-up? + */ + crm_warn("Missing request information for client notifications for " + "operation with result '%s' (initiated before we came up?)", + pcmk_exec_status_str(result->execution_status)); + + } else { + const char *name = NULL; + const char *value = NULL; + + // Attributes to copy from request to reply + const char *names[] = { + F_STONITH_OPERATION, + F_STONITH_CALLID, + F_STONITH_CLIENTID, + F_STONITH_CLIENTNAME, + F_STONITH_REMOTE_OP_ID, + F_STONITH_CALLOPTS + }; + + for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) { + name = names[lpc]; + value = crm_element_value(request, name); + crm_xml_add(reply, name, value); + } + if (data != NULL) { + add_message_xml(reply, F_STONITH_CALLDATA, data); + } + } + return reply; +} + +/*! + * \internal + * \brief Build an XML reply to an asynchronous fencing command + * + * \param[in] cmd Fencing command that reply is for + * \param[in] result Command result + */ +static xmlNode * +construct_async_reply(const async_command_t *cmd, + const pcmk__action_result_t *result) +{ + xmlNode *reply = create_xml_node(NULL, T_STONITH_REPLY); + + crm_xml_add(reply, "st_origin", __func__); + crm_xml_add(reply, F_TYPE, T_STONITH_NG); + crm_xml_add(reply, F_STONITH_OPERATION, cmd->op); + crm_xml_add(reply, F_STONITH_DEVICE, cmd->device); + crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); + crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client); + crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name); + crm_xml_add(reply, F_STONITH_TARGET, cmd->target); + crm_xml_add(reply, F_STONITH_ACTION, cmd->op); + crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin); + crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id); + crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options); + + stonith__xe_set_result(reply, result); + return reply; +} + +bool fencing_peer_active(crm_node_t *peer) +{ + if (peer == NULL) { + return FALSE; + } else if (peer->uname == NULL) { + return FALSE; + } else if (pcmk_is_set(peer->processes, crm_get_cluster_proc())) { + return TRUE; + } + return FALSE; +} + +void +set_fencing_completed(remote_fencing_op_t *op) +{ + struct timespec tv; + + qb_util_timespec_from_epoch_get(&tv); + op->completed = tv.tv_sec; + op->completed_nsec = tv.tv_nsec; +} + +/*! + * \internal + * \brief Look for alternate node needed if local node shouldn't fence target + * + * \param[in] target Node that must be fenced + * + * \return Name of an alternate node that should fence \p target if any, + * or NULL otherwise + */ +static const char * +check_alternate_host(const char *target) +{ + if (pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) { + GHashTableIter gIter; + crm_node_t *entry = NULL; + + g_hash_table_iter_init(&gIter, crm_peer_cache); + while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { + if (fencing_peer_active(entry) + && !pcmk__str_eq(entry->uname, target, pcmk__str_casei)) { + crm_notice("Forwarding self-fencing request to %s", + entry->uname); + return entry->uname; + } + } + crm_warn("Will handle own fencing because no peer can"); + } + return NULL; +} + +/*! + * \internal + * \brief Send a reply to a CPG peer or IPC client + * + * \param[in] reply XML reply to send + * \param[in] call_options Send synchronously if st_opt_sync_call is set + * \param[in] remote_peer If not NULL, name of peer node to send CPG reply + * \param[in,out] client If not NULL, client to send IPC reply + */ +static void +stonith_send_reply(xmlNode *reply, int call_options, const char *remote_peer, + pcmk__client_t *client) +{ + CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)), + return); + + if (remote_peer == NULL) { + do_local_reply(reply, client, call_options); + } else { + send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, + reply, FALSE); + } +} + +static void +remove_relay_op(xmlNode * request) +{ + xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, request, LOG_TRACE); + const char *relay_op_id = NULL; + const char *op_id = NULL; + const char *client_name = NULL; + const char *target = NULL; + remote_fencing_op_t *relay_op = NULL; + + if (dev) { + target = crm_element_value(dev, F_STONITH_TARGET); + } + + relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID_RELAY); + op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID); + client_name = crm_element_value(request, F_STONITH_CLIENTNAME); + + /* Delete RELAY operation. */ + if (relay_op_id && target && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) { + relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id); + + if (relay_op) { + GHashTableIter iter; + remote_fencing_op_t *list_op = NULL; + g_hash_table_iter_init(&iter, stonith_remote_op_list); + + /* If the operation to be deleted is registered as a duplicate, delete the registration. */ + while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) { + GList *dup_iter = NULL; + if (list_op != relay_op) { + for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) { + remote_fencing_op_t *other = dup_iter->data; + if (other == relay_op) { + other->duplicates = g_list_remove(other->duplicates, relay_op); + break; + } + } + } + } + crm_debug("Deleting relay op %s ('%s'%s%s for %s), " + "replaced by op %s ('%s'%s%s for %s)", + relay_op->id, relay_op->action, + (relay_op->target == NULL)? "" : " targeting ", + pcmk__s(relay_op->target, ""), + relay_op->client_name, op_id, relay_op->action, + (target == NULL)? "" : " targeting ", pcmk__s(target, ""), + client_name); + + g_hash_table_remove(stonith_remote_op_list, relay_op_id); + } + } +} + +/*! + * \internal + * \brief Check whether an API request was sent by a privileged user + * + * API commands related to fencing configuration may be done only by privileged + * IPC users (i.e. root or hacluster), because all other users should go through + * the CIB to have ACLs applied. If no client was given, this is a peer request, + * which is always allowed. + * + * \param[in] c IPC client that sent request (or NULL if sent by CPG peer) + * \param[in] op Requested API operation (for logging only) + * + * \return true if sender is peer or privileged client, otherwise false + */ +static inline bool +is_privileged(const pcmk__client_t *c, const char *op) +{ + if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) { + return true; + } else { + crm_warn("Rejecting IPC request '%s' from unprivileged client %s", + pcmk__s(op, ""), pcmk__client_name(c)); + return false; + } +} + +// CRM_OP_REGISTER +static xmlNode * +handle_register_request(pcmk__request_t *request) +{ + xmlNode *reply = create_xml_node(NULL, "reply"); + + CRM_ASSERT(request->ipc_client != NULL); + crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER); + crm_xml_add(reply, F_STONITH_CLIENTID, request->ipc_client->id); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + pcmk__set_request_flags(request, pcmk__request_reuse_options); + return reply; +} + +// STONITH_OP_EXEC +static xmlNode * +handle_agent_request(pcmk__request_t *request) +{ + execute_agent_action(request->xml, &request->result); + if (request->result.execution_status == PCMK_EXEC_PENDING) { + return NULL; + } + return fenced_construct_reply(request->xml, NULL, &request->result); +} + +// STONITH_OP_TIMEOUT_UPDATE +static xmlNode * +handle_update_timeout_request(pcmk__request_t *request) +{ + const char *call_id = crm_element_value(request->xml, F_STONITH_CALLID); + const char *client_id = crm_element_value(request->xml, F_STONITH_CLIENTID); + int op_timeout = 0; + + crm_element_value_int(request->xml, F_STONITH_TIMEOUT, &op_timeout); + do_stonith_async_timeout_update(client_id, call_id, op_timeout); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; +} + +// STONITH_OP_QUERY +static xmlNode * +handle_query_request(pcmk__request_t *request) +{ + int timeout = 0; + xmlNode *dev = NULL; + const char *action = NULL; + const char *target = NULL; + const char *client_id = crm_element_value(request->xml, F_STONITH_CLIENTID); + struct st_query_data *query = NULL; + + if (request->peer != NULL) { + // Record it for the future notification + create_remote_stonith_op(client_id, request->xml, TRUE); + } + + /* Delete the DC node RELAY operation. */ + remove_relay_op(request->xml); + + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + + dev = get_xpath_object("//@" F_STONITH_ACTION, request->xml, LOG_NEVER); + if (dev != NULL) { + const char *device = crm_element_value(dev, F_STONITH_DEVICE); + + if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) { + return NULL; // No query or reply necessary + } + target = crm_element_value(dev, F_STONITH_TARGET); + action = crm_element_value(dev, F_STONITH_ACTION); + } + + crm_log_xml_trace(request->xml, "Query"); + + query = calloc(1, sizeof(struct st_query_data)); + CRM_ASSERT(query != NULL); + + query->reply = fenced_construct_reply(request->xml, NULL, &request->result); + pcmk__str_update(&query->remote_peer, request->peer); + pcmk__str_update(&query->client_id, client_id); + pcmk__str_update(&query->target, target); + pcmk__str_update(&query->action, action); + query->call_options = request->call_options; + + crm_element_value_int(request->xml, F_STONITH_TIMEOUT, &timeout); + get_capable_devices(target, action, timeout, + pcmk_is_set(query->call_options, st_opt_allow_suicide), + query, stonith_query_capable_device_cb, st_device_supports_none); + return NULL; +} + +// T_STONITH_NOTIFY +static xmlNode * +handle_notify_request(pcmk__request_t *request) +{ + const char *flag_name = NULL; + + CRM_ASSERT(request->ipc_client != NULL); + flag_name = crm_element_value(request->xml, F_STONITH_NOTIFY_ACTIVATE); + if (flag_name != NULL) { + crm_debug("Enabling %s callbacks for client %s", + flag_name, pcmk__request_origin(request)); + pcmk__set_client_flags(request->ipc_client, get_stonith_flag(flag_name)); + } + + flag_name = crm_element_value(request->xml, F_STONITH_NOTIFY_DEACTIVATE); + if (flag_name != NULL) { + crm_debug("Disabling %s callbacks for client %s", + flag_name, pcmk__request_origin(request)); + pcmk__clear_client_flags(request->ipc_client, + get_stonith_flag(flag_name)); + } + + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + pcmk__set_request_flags(request, pcmk__request_reuse_options); + + return pcmk__ipc_create_ack(request->ipc_flags, "ack", NULL, CRM_EX_OK); +} + +// STONITH_OP_RELAY +static xmlNode * +handle_relay_request(pcmk__request_t *request) +{ + xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request->xml, + LOG_TRACE); + + crm_notice("Received forwarded fencing request from " + "%s %s to fence (%s) peer %s", + pcmk__request_origin_type(request), + pcmk__request_origin(request), + crm_element_value(dev, F_STONITH_ACTION), + crm_element_value(dev, F_STONITH_TARGET)); + + if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) { + fenced_set_protocol_error(&request->result); + return fenced_construct_reply(request->xml, NULL, &request->result); + } + + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); + return NULL; +} + +// STONITH_OP_FENCE +static xmlNode * +handle_fence_request(pcmk__request_t *request) +{ + if ((request->peer != NULL) || stand_alone) { + fence_locally(request->xml, &request->result); + + } else if (pcmk_is_set(request->call_options, st_opt_manual_ack)) { + switch (fenced_handle_manual_confirmation(request->ipc_client, + request->xml)) { + case pcmk_rc_ok: + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, + NULL); + break; + case EINPROGRESS: + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, + NULL); + break; + default: + fenced_set_protocol_error(&request->result); + break; + } + + } else { + const char *alternate_host = NULL; + xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request->xml, + LOG_TRACE); + const char *target = crm_element_value(dev, F_STONITH_TARGET); + const char *action = crm_element_value(dev, F_STONITH_ACTION); + const char *device = crm_element_value(dev, F_STONITH_DEVICE); + + if (request->ipc_client != NULL) { + int tolerance = 0; + + crm_notice("Client %s wants to fence (%s) %s using %s", + pcmk__request_origin(request), action, + target, (device? device : "any device")); + crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance); + if (stonith_check_fence_tolerance(tolerance, target, action)) { + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, + NULL); + return fenced_construct_reply(request->xml, NULL, + &request->result); + } + alternate_host = check_alternate_host(target); + + } else { + crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'", + request->peer, action, target, + (device == NULL)? "(any)" : device); + } + + if (alternate_host != NULL) { + const char *client_id = NULL; + remote_fencing_op_t *op = NULL; + + if (request->ipc_client->id == 0) { + client_id = crm_element_value(request->xml, F_STONITH_CLIENTID); + } else { + client_id = request->ipc_client->id; + } + + /* Create a duplicate fencing operation to relay with the client ID. + * When a query response is received, this operation should be + * deleted to avoid keeping the duplicate around. + */ + op = create_remote_stonith_op(client_id, request->xml, FALSE); + + crm_xml_add(request->xml, F_STONITH_OPERATION, STONITH_OP_RELAY); + crm_xml_add(request->xml, F_STONITH_CLIENTID, + request->ipc_client->id); + crm_xml_add(request->xml, F_STONITH_REMOTE_OP_ID, op->id); + send_cluster_message(crm_get_peer(0, alternate_host), + crm_msg_stonith_ng, request->xml, FALSE); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, + NULL); + + } else if (initiate_remote_stonith_op(request->ipc_client, request->xml, + FALSE) == NULL) { + fenced_set_protocol_error(&request->result); + + } else { + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, + NULL); + } + } + + if (request->result.execution_status == PCMK_EXEC_PENDING) { + return NULL; + } + return fenced_construct_reply(request->xml, NULL, &request->result); +} + +// STONITH_OP_FENCE_HISTORY +static xmlNode * +handle_history_request(pcmk__request_t *request) +{ + xmlNode *reply = NULL; + xmlNode *data = NULL; + + stonith_fence_history(request->xml, &data, request->peer, + request->call_options); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + if (!pcmk_is_set(request->call_options, st_opt_discard_reply)) { + /* When the local node broadcasts its history, it sets + * st_opt_discard_reply and doesn't need a reply. + */ + reply = fenced_construct_reply(request->xml, data, &request->result); + } + free_xml(data); + return reply; +} + +// STONITH_OP_DEVICE_ADD +static xmlNode * +handle_device_add_request(pcmk__request_t *request) +{ + const char *op = crm_element_value(request->xml, F_STONITH_OPERATION); + xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request->xml, + LOG_ERR); + + if (is_privileged(request->ipc_client, op)) { + int rc = stonith_device_register(dev, FALSE); + + pcmk__set_result(&request->result, + ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR), + stonith__legacy2status(rc), + ((rc == pcmk_ok)? NULL : pcmk_strerror(rc))); + } else { + pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, + PCMK_EXEC_INVALID, + "Unprivileged users must register device via CIB"); + } + fenced_send_device_notification(op, &request->result, + (dev == NULL)? NULL : ID(dev)); + return fenced_construct_reply(request->xml, NULL, &request->result); +} + +// STONITH_OP_DEVICE_DEL +static xmlNode * +handle_device_delete_request(pcmk__request_t *request) +{ + xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request->xml, + LOG_ERR); + const char *device_id = crm_element_value(dev, XML_ATTR_ID); + const char *op = crm_element_value(request->xml, F_STONITH_OPERATION); + + if (is_privileged(request->ipc_client, op)) { + stonith_device_remove(device_id, false); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + } else { + pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, + PCMK_EXEC_INVALID, + "Unprivileged users must delete device via CIB"); + } + fenced_send_device_notification(op, &request->result, device_id); + return fenced_construct_reply(request->xml, NULL, &request->result); +} + +// STONITH_OP_LEVEL_ADD +static xmlNode * +handle_level_add_request(pcmk__request_t *request) +{ + char *desc = NULL; + const char *op = crm_element_value(request->xml, F_STONITH_OPERATION); + + if (is_privileged(request->ipc_client, op)) { + fenced_register_level(request->xml, &desc, &request->result); + } else { + unpack_level_request(request->xml, NULL, NULL, NULL, &desc); + pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, + PCMK_EXEC_INVALID, + "Unprivileged users must add level via CIB"); + } + fenced_send_level_notification(op, &request->result, desc); + free(desc); + return fenced_construct_reply(request->xml, NULL, &request->result); +} + +// STONITH_OP_LEVEL_DEL +static xmlNode * +handle_level_delete_request(pcmk__request_t *request) +{ + char *desc = NULL; + const char *op = crm_element_value(request->xml, F_STONITH_OPERATION); + + if (is_privileged(request->ipc_client, op)) { + fenced_unregister_level(request->xml, &desc, &request->result); + } else { + unpack_level_request(request->xml, NULL, NULL, NULL, &desc); + pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, + PCMK_EXEC_INVALID, + "Unprivileged users must delete level via CIB"); + } + fenced_send_level_notification(op, &request->result, desc); + free(desc); + return fenced_construct_reply(request->xml, NULL, &request->result); +} + +// CRM_OP_RM_NODE_CACHE +static xmlNode * +handle_cache_request(pcmk__request_t *request) +{ + int node_id = 0; + const char *name = NULL; + + crm_element_value_int(request->xml, XML_ATTR_ID, &node_id); + name = crm_element_value(request->xml, XML_ATTR_UNAME); + reap_crm_member(node_id, name); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; +} + +static xmlNode * +handle_unknown_request(pcmk__request_t *request) +{ + crm_err("Unknown IPC request %s from %s %s", + request->op, pcmk__request_origin_type(request), + pcmk__request_origin(request)); + pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, + "Unknown IPC request type '%s' (bug?)", request->op); + return fenced_construct_reply(request->xml, NULL, &request->result); +} + +static void +fenced_register_handlers(void) +{ + pcmk__server_command_t handlers[] = { + { CRM_OP_REGISTER, handle_register_request }, + { STONITH_OP_EXEC, handle_agent_request }, + { STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request }, + { STONITH_OP_QUERY, handle_query_request }, + { T_STONITH_NOTIFY, handle_notify_request }, + { STONITH_OP_RELAY, handle_relay_request }, + { STONITH_OP_FENCE, handle_fence_request }, + { STONITH_OP_FENCE_HISTORY, handle_history_request }, + { STONITH_OP_DEVICE_ADD, handle_device_add_request }, + { STONITH_OP_DEVICE_DEL, handle_device_delete_request }, + { STONITH_OP_LEVEL_ADD, handle_level_add_request }, + { STONITH_OP_LEVEL_DEL, handle_level_delete_request }, + { CRM_OP_RM_NODE_CACHE, handle_cache_request }, + { NULL, handle_unknown_request }, + }; + + fenced_handlers = pcmk__register_handlers(handlers); +} + +void +fenced_unregister_handlers(void) +{ + if (fenced_handlers != NULL) { + g_hash_table_destroy(fenced_handlers); + fenced_handlers = NULL; + } +} + +static void +handle_request(pcmk__request_t *request) +{ + xmlNode *reply = NULL; + const char *reason = NULL; + + if (fenced_handlers == NULL) { + fenced_register_handlers(); + } + reply = pcmk__process_request(request, fenced_handlers); + if (reply != NULL) { + if (pcmk_is_set(request->flags, pcmk__request_reuse_options) + && (request->ipc_client != NULL)) { + /* Certain IPC-only commands must reuse the call options from the + * original request rather than the ones set by stonith_send_reply() + * -> do_local_reply(). + */ + pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply, + request->ipc_flags); + request->ipc_client->request_id = 0; + } else { + stonith_send_reply(reply, request->call_options, + request->peer, request->ipc_client); + } + free_xml(reply); + } + + reason = request->result.exit_reason; + crm_debug("Processed %s request from %s %s: %s%s%s%s", + request->op, pcmk__request_origin_type(request), + pcmk__request_origin(request), + pcmk_exec_status_str(request->result.execution_status), + (reason == NULL)? "" : " (", + (reason == NULL)? "" : reason, + (reason == NULL)? "" : ")"); +} + +static void +handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer) +{ + // Copy, because request might be freed before we want to log this + char *op = crm_element_value_copy(request, F_STONITH_OPERATION); + + if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { + process_remote_stonith_query(request); + } else if (pcmk__str_any_of(op, T_STONITH_NOTIFY, STONITH_OP_FENCE, NULL)) { + fenced_process_fencing_reply(request); + } else { + crm_err("Ignoring unknown %s reply from %s %s", + pcmk__s(op, "untyped"), ((client == NULL)? "peer" : "client"), + ((client == NULL)? remote_peer : pcmk__client_name(client))); + crm_log_xml_warn(request, "UnknownOp"); + free(op); + return; + } + crm_debug("Processed %s reply from %s %s", + op, ((client == NULL)? "peer" : "client"), + ((client == NULL)? remote_peer : pcmk__client_name(client))); + free(op); +} + +/*! + * \internal + * \brief Handle a message from an IPC client or CPG peer + * + * \param[in,out] client If not NULL, IPC client that sent message + * \param[in] id If from IPC client, IPC message ID + * \param[in] flags Message flags + * \param[in,out] message Message XML + * \param[in] remote_peer If not NULL, CPG peer that sent message + */ +void +stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, + xmlNode *message, const char *remote_peer) +{ + int call_options = st_opt_none; + bool is_reply = false; + + CRM_CHECK(message != NULL, return); + + if (get_xpath_object("//" T_STONITH_REPLY, message, LOG_NEVER) != NULL) { + is_reply = true; + } + crm_element_value_int(message, F_STONITH_CALLOPTS, &call_options); + crm_debug("Processing %ssynchronous %s %s %u from %s %s", + pcmk_is_set(call_options, st_opt_sync_call)? "" : "a", + crm_element_value(message, F_STONITH_OPERATION), + (is_reply? "reply" : "request"), id, + ((client == NULL)? "peer" : "client"), + ((client == NULL)? remote_peer : pcmk__client_name(client))); + + if (pcmk_is_set(call_options, st_opt_sync_call)) { + CRM_ASSERT(client == NULL || client->request_id == id); + } + + if (is_reply) { + handle_reply(client, message, remote_peer); + } else { + pcmk__request_t request = { + .ipc_client = client, + .ipc_id = id, + .ipc_flags = flags, + .peer = remote_peer, + .xml = message, + .call_options = call_options, + .result = PCMK__UNKNOWN_RESULT, + }; + + request.op = crm_element_value_copy(request.xml, F_STONITH_OPERATION); + CRM_CHECK(request.op != NULL, return); + + if (pcmk_is_set(request.call_options, st_opt_sync_call)) { + pcmk__set_request_flags(&request, pcmk__request_sync); + } + + handle_request(&request); + pcmk__reset_request(&request); + } +} diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c new file mode 100644 index 0000000..a766477 --- /dev/null +++ b/daemons/fenced/fenced_history.c @@ -0,0 +1,548 @@ +/* + * Copyright 2009-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#define MAX_STONITH_HISTORY 500 + +/*! + * \internal + * \brief Send a broadcast to all nodes to trigger cleanup or + * history synchronisation + * + * \param[in] history Optional history to be attached + * \param[in] callopts We control cleanup via a flag in the callopts + * \param[in] target Cleanup can be limited to certain fence-targets + */ +static void +stonith_send_broadcast_history(xmlNode *history, + int callopts, + const char *target) +{ + xmlNode *bcast = create_xml_node(NULL, "stonith_command"); + xmlNode *data = create_xml_node(NULL, __func__); + + if (target) { + crm_xml_add(data, F_STONITH_TARGET, target); + } + crm_xml_add(bcast, F_TYPE, T_STONITH_NG); + crm_xml_add(bcast, F_SUBTYPE, "broadcast"); + crm_xml_add(bcast, F_STONITH_OPERATION, STONITH_OP_FENCE_HISTORY); + crm_xml_add_int(bcast, F_STONITH_CALLOPTS, callopts); + if (history) { + add_node_copy(data, history); + } + add_message_xml(bcast, F_STONITH_CALLDATA, data); + send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE); + + free_xml(data); + free_xml(bcast); +} + +static gboolean +stonith_remove_history_entry (gpointer key, + gpointer value, + gpointer user_data) +{ + remote_fencing_op_t *op = value; + const char *target = (const char *) user_data; + + if ((op->state == st_failed) || (op->state == st_done)) { + if ((target) && (strcmp(op->target, target) != 0)) { + return FALSE; + } + return TRUE; + } + + return FALSE; /* don't clean pending operations */ +} + +/*! + * \internal + * \brief Send out a cleanup broadcast or do a local history-cleanup + * + * \param[in] target Cleanup can be limited to certain fence-targets + * \param[in] broadcast Send out a cleanup broadcast + */ +static void +stonith_fence_history_cleanup(const char *target, + gboolean broadcast) +{ + if (broadcast) { + stonith_send_broadcast_history(NULL, + st_opt_cleanup | st_opt_discard_reply, + target); + /* we'll do the local clean when we receive back our own broadcast */ + } else if (stonith_remote_op_list) { + g_hash_table_foreach_remove(stonith_remote_op_list, + stonith_remove_history_entry, + (gpointer) target); + fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL); + } +} + +/* keeping the length of fence-history within bounds + * ================================================= + * + * If things are really running wild a lot of fencing-attempts + * might fill up the hash-map, eventually using up a lot + * of memory and creating huge history-sync messages. + * Before the history being synced across nodes at least + * the reboot of a cluster-node helped keeping the + * history within bounds even though not in a reliable + * manner. + * + * stonith_remote_op_list isn't sorted for time-stamps + * thus it would be kind of expensive to delete e.g. + * the oldest entry if it would grow past MAX_STONITH_HISTORY + * entries. + * It is more efficient to purge MAX_STONITH_HISTORY/2 + * entries whenever the list grows beyond MAX_STONITH_HISTORY. + * (sort for age + purge the MAX_STONITH_HISTORY/2 oldest) + * That done on a per-node-base might raise the + * probability of large syncs to occur. + * Things like introducing a broadcast to purge + * MAX_STONITH_HISTORY/2 entries or not sync above a certain + * threshold coming to mind ... + * Simplest thing though is to purge the full history + * throughout the cluster once MAX_STONITH_HISTORY is reached. + * On the other hand this leads to purging the history in + * situations where it would be handy to have it probably. + */ + + +static int +op_time_sort(const void *a_voidp, const void *b_voidp) +{ + const remote_fencing_op_t **a = (const remote_fencing_op_t **) a_voidp; + const remote_fencing_op_t **b = (const remote_fencing_op_t **) b_voidp; + gboolean a_pending = ((*a)->state != st_failed) && ((*a)->state != st_done); + gboolean b_pending = ((*b)->state != st_failed) && ((*b)->state != st_done); + + if (a_pending && b_pending) { + return 0; + } else if (a_pending) { + return -1; + } else if (b_pending) { + return 1; + } else if ((*b)->completed == (*a)->completed) { + if ((*b)->completed_nsec > (*a)->completed_nsec) { + return 1; + } else if ((*b)->completed_nsec == (*a)->completed_nsec) { + return 0; + } + } else if ((*b)->completed > (*a)->completed) { + return 1; + } + + return -1; +} + + +/*! + * \internal + * \brief Do a local history-trim to MAX_STONITH_HISTORY / 2 entries + * once over MAX_STONITH_HISTORY + */ +void +stonith_fence_history_trim(void) +{ + guint num_ops; + + if (!stonith_remote_op_list) { + return; + } + num_ops = g_hash_table_size(stonith_remote_op_list); + if (num_ops > MAX_STONITH_HISTORY) { + remote_fencing_op_t *ops[num_ops]; + remote_fencing_op_t *op = NULL; + GHashTableIter iter; + int i; + + crm_trace("Fencing History growing beyond limit of %d so purge " + "half of failed/successful attempts", MAX_STONITH_HISTORY); + + /* write all ops into an array */ + i = 0; + g_hash_table_iter_init(&iter, stonith_remote_op_list); + while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) { + ops[i++] = op; + } + /* run quicksort over the array so that we get pending ops + * first and then sorted most recent to oldest + */ + qsort(ops, num_ops, sizeof(remote_fencing_op_t *), op_time_sort); + /* purgest oldest half of the history entries */ + for (i = MAX_STONITH_HISTORY / 2; i < num_ops; i++) { + /* keep pending ops even if they shouldn't fill more than + * half of our buffer + */ + if ((ops[i]->state == st_failed) || (ops[i]->state == st_done)) { + g_hash_table_remove(stonith_remote_op_list, ops[i]->id); + } + } + /* we've just purged valid data from the list so there is no need + * to create a notification - if displayed it can stay + */ + } +} + +/*! + * \internal + * \brief Convert xml fence-history to a hash-table like stonith_remote_op_list + * + * \param[in] history Fence-history in xml + * + * \return Fence-history as hash-table + */ +static GHashTable * +stonith_xml_history_to_list(const xmlNode *history) +{ + xmlNode *xml_op = NULL; + GHashTable *rv = NULL; + + init_stonith_remote_op_hash_table(&rv); + + CRM_LOG_ASSERT(rv != NULL); + + for (xml_op = pcmk__xml_first_child(history); xml_op != NULL; + xml_op = pcmk__xml_next(xml_op)) { + remote_fencing_op_t *op = NULL; + char *id = crm_element_value_copy(xml_op, F_STONITH_REMOTE_OP_ID); + int state; + int exit_status = CRM_EX_OK; + int execution_status = PCMK_EXEC_DONE; + long long completed; + long long completed_nsec = 0L; + + if (!id) { + crm_warn("Malformed fencing history received from peer"); + continue; + } + + crm_trace("Attaching op %s to hashtable", id); + + op = calloc(1, sizeof(remote_fencing_op_t)); + + op->id = id; + op->target = crm_element_value_copy(xml_op, F_STONITH_TARGET); + op->action = crm_element_value_copy(xml_op, F_STONITH_ACTION); + op->originator = crm_element_value_copy(xml_op, F_STONITH_ORIGIN); + op->delegate = crm_element_value_copy(xml_op, F_STONITH_DELEGATE); + op->client_name = crm_element_value_copy(xml_op, F_STONITH_CLIENTNAME); + crm_element_value_ll(xml_op, F_STONITH_DATE, &completed); + op->completed = (time_t) completed; + crm_element_value_ll(xml_op, F_STONITH_DATE_NSEC, &completed_nsec); + op->completed_nsec = completed_nsec; + crm_element_value_int(xml_op, F_STONITH_STATE, &state); + op->state = (enum op_state) state; + + /* @COMPAT We can't use stonith__xe_get_result() here because + * fencers <2.1.3 didn't include results, leading it to assume an error + * status. Instead, set an unknown status in that case. + */ + if ((crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &exit_status) < 0) + || (crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, + &execution_status) < 0)) { + exit_status = CRM_EX_INDETERMINATE; + execution_status = PCMK_EXEC_UNKNOWN; + } + pcmk__set_result(&op->result, exit_status, execution_status, + crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON)); + pcmk__set_result_output(&op->result, + crm_element_value_copy(xml_op, F_STONITH_OUTPUT), + NULL); + + + g_hash_table_replace(rv, id, op); + CRM_LOG_ASSERT(g_hash_table_lookup(rv, id) != NULL); + } + + return rv; +} + +/*! + * \internal + * \brief Craft xml difference between local fence-history and a history + * coming from remote, and merge the remote history into the local + * + * \param[in,out] remote_history Fence-history as hash-table (may be NULL) + * \param[in] add_id If crafting the answer for an API + * history-request there is no need for the id + * \param[in] target Optionally limit to certain fence-target + * + * \return The fence-history as xml + */ +static xmlNode * +stonith_local_history_diff_and_merge(GHashTable *remote_history, + gboolean add_id, const char *target) +{ + xmlNode *history = NULL; + GHashTableIter iter; + remote_fencing_op_t *op = NULL; + gboolean updated = FALSE; + int cnt = 0; + + if (stonith_remote_op_list) { + char *id = NULL; + + history = create_xml_node(NULL, F_STONITH_HISTORY_LIST); + + g_hash_table_iter_init(&iter, stonith_remote_op_list); + while (g_hash_table_iter_next(&iter, (void **)&id, (void **)&op)) { + xmlNode *entry = NULL; + + if (remote_history) { + remote_fencing_op_t *remote_op = + g_hash_table_lookup(remote_history, op->id); + + if (remote_op) { + if (stonith__op_state_pending(op->state) + && !stonith__op_state_pending(remote_op->state)) { + + crm_debug("Updating outdated pending operation %.8s " + "(state=%s) according to the one (state=%s) from " + "remote peer history", + op->id, stonith_op_state_str(op->state), + stonith_op_state_str(remote_op->state)); + + g_hash_table_steal(remote_history, op->id); + op->id = remote_op->id; + remote_op->id = id; + g_hash_table_iter_replace(&iter, remote_op); + + updated = TRUE; + continue; /* skip outdated entries */ + + } else if (!stonith__op_state_pending(op->state) + && stonith__op_state_pending(remote_op->state)) { + + crm_debug("Broadcasting operation %.8s (state=%s) to " + "update the outdated pending one " + "(state=%s) in remote peer history", + op->id, stonith_op_state_str(op->state), + stonith_op_state_str(remote_op->state)); + + g_hash_table_remove(remote_history, op->id); + + } else { + g_hash_table_remove(remote_history, op->id); + continue; /* skip entries broadcasted already */ + } + } + } + + if (!pcmk__str_eq(target, op->target, pcmk__str_null_matches)) { + continue; + } + + cnt++; + crm_trace("Attaching op %s", op->id); + entry = create_xml_node(history, STONITH_OP_EXEC); + if (add_id) { + crm_xml_add(entry, F_STONITH_REMOTE_OP_ID, op->id); + } + crm_xml_add(entry, F_STONITH_TARGET, op->target); + crm_xml_add(entry, F_STONITH_ACTION, op->action); + crm_xml_add(entry, F_STONITH_ORIGIN, op->originator); + crm_xml_add(entry, F_STONITH_DELEGATE, op->delegate); + crm_xml_add(entry, F_STONITH_CLIENTNAME, op->client_name); + crm_xml_add_ll(entry, F_STONITH_DATE, op->completed); + crm_xml_add_ll(entry, F_STONITH_DATE_NSEC, op->completed_nsec); + crm_xml_add_int(entry, F_STONITH_STATE, op->state); + stonith__xe_set_result(entry, &op->result); + } + } + + if (remote_history) { + init_stonith_remote_op_hash_table(&stonith_remote_op_list); + + updated |= g_hash_table_size(remote_history); + + g_hash_table_iter_init(&iter, remote_history); + while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) { + if (stonith__op_state_pending(op->state) && + pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) { + + crm_warn("Failing pending operation %.8s originated by us but " + "known only from peer history", op->id); + op->state = st_failed; + set_fencing_completed(op); + + /* CRM_EX_EXPIRED + PCMK_EXEC_INVALID prevents finalize_op() + * from setting a delegate + */ + pcmk__set_result(&op->result, CRM_EX_EXPIRED, PCMK_EXEC_INVALID, + "Initiated by earlier fencer " + "process and presumed failed"); + fenced_broadcast_op_result(op, false); + } + + g_hash_table_iter_steal(&iter); + g_hash_table_replace(stonith_remote_op_list, op->id, op); + /* we could trim the history here but if we bail + * out after trim we might miss more recent entries + * of those that might still be in the list + * if we don't bail out trimming once is more + * efficient and memory overhead is minimal as + * we are just moving pointers from one hash to + * another + */ + } + + g_hash_table_destroy(remote_history); /* remove what is left */ + } + + if (updated) { + stonith_fence_history_trim(); + fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL); + } + + if (cnt == 0) { + free_xml(history); + return NULL; + } else { + return history; + } +} + +/*! + * \internal + * \brief Craft xml from the local fence-history + * + * \param[in] add_id If crafting the answer for an API + * history-request there is no need for the id + * \param[in] target Optionally limit to certain fence-target + * + * \return The fence-history as xml + */ +static xmlNode * +stonith_local_history(gboolean add_id, const char *target) +{ + return stonith_local_history_diff_and_merge(NULL, add_id, target); +} + +/*! + * \internal + * \brief Handle fence-history messages (from API or coming in as broadcasts) + * + * \param[in,out] msg Request XML + * \param[out] output Where to set local history, if requested + * \param[in] remote_peer If broadcast, peer that sent it + * \param[in] options Call options from the request + */ +void +stonith_fence_history(xmlNode *msg, xmlNode **output, + const char *remote_peer, int options) +{ + const char *target = NULL; + xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_NEVER); + xmlNode *out_history = NULL; + + if (dev) { + target = crm_element_value(dev, F_STONITH_TARGET); + if (target && (options & st_opt_cs_nodeid)) { + int nodeid; + crm_node_t *node; + + pcmk__scan_min_int(target, &nodeid, 0); + node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY); + if (node) { + target = node->uname; + } + } + } + + if (options & st_opt_cleanup) { + crm_trace("Cleaning up operations on %s in %p", target, + stonith_remote_op_list); + + stonith_fence_history_cleanup(target, + crm_element_value(msg, F_STONITH_CALLID) != NULL); + } else if (options & st_opt_broadcast) { + /* there is no clear sign atm for when a history sync + is done so send a notification for anything + that smells like history-sync + */ + fenced_send_notification(T_STONITH_NOTIFY_HISTORY_SYNCED, NULL, NULL); + if (crm_element_value(msg, F_STONITH_CALLID)) { + /* this is coming from the stonith-API + * + * craft a broadcast with node's history + * so that every node can merge and broadcast + * what it has on top + */ + out_history = stonith_local_history(TRUE, NULL); + crm_trace("Broadcasting history to peers"); + stonith_send_broadcast_history(out_history, + st_opt_broadcast | st_opt_discard_reply, + NULL); + } else if (remote_peer && + !pcmk__str_eq(remote_peer, stonith_our_uname, pcmk__str_casei)) { + xmlNode *history = get_xpath_object("//" F_STONITH_HISTORY_LIST, + msg, LOG_NEVER); + + /* either a broadcast created directly upon stonith-API request + * or a diff as response to such a thing + * + * in both cases it may have a history or not + * if we have differential data + * merge in what we've received and stop + * otherwise broadcast what we have on top + * marking as differential and merge in afterwards + */ + if (!history || !pcmk__xe_attr_is_true(history, F_STONITH_DIFFERENTIAL)) { + GHashTable *received_history = NULL; + + if (history != NULL) { + received_history = stonith_xml_history_to_list(history); + } + out_history = + stonith_local_history_diff_and_merge(received_history, TRUE, NULL); + if (out_history) { + crm_trace("Broadcasting history-diff to peers"); + pcmk__xe_set_bool_attr(out_history, F_STONITH_DIFFERENTIAL, true); + stonith_send_broadcast_history(out_history, + st_opt_broadcast | st_opt_discard_reply, + NULL); + } else { + crm_trace("History-diff is empty - skip broadcast"); + } + } + } else { + crm_trace("Skipping history-query-broadcast (%s%s)" + " we sent ourselves", + remote_peer?"remote-peer=":"local-ipc", + remote_peer?remote_peer:""); + } + } else { + /* plain history request */ + crm_trace("Looking for operations on %s in %p", target, + stonith_remote_op_list); + *output = stonith_local_history(FALSE, target); + } + free_xml(out_history); +} diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c new file mode 100644 index 0000000..dc67947 --- /dev/null +++ b/daemons/fenced/fenced_remote.c @@ -0,0 +1,2509 @@ +/* + * Copyright 2009-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#define TIMEOUT_MULTIPLY_FACTOR 1.2 + +/* When one fencer queries its peers for devices able to handle a fencing + * request, each peer will reply with a list of such devices available to it. + * Each reply will be parsed into a peer_device_info_t, with each device's + * information kept in a device_properties_t. + */ + +typedef struct device_properties_s { + /* Whether access to this device has been verified */ + gboolean verified; + + /* The remaining members are indexed by the operation's "phase" */ + + /* Whether this device has been executed in each phase */ + gboolean executed[st_phase_max]; + /* Whether this device is disallowed from executing in each phase */ + gboolean disallowed[st_phase_max]; + /* Action-specific timeout for each phase */ + int custom_action_timeout[st_phase_max]; + /* Action-specific maximum random delay for each phase */ + int delay_max[st_phase_max]; + /* Action-specific base delay for each phase */ + int delay_base[st_phase_max]; + /* Group of enum st_device_flags */ + uint32_t device_support_flags; +} device_properties_t; + +typedef struct { + /* Name of peer that sent this result */ + char *host; + /* Only try peers for non-topology based operations once */ + gboolean tried; + /* Number of entries in the devices table */ + int ndevices; + /* Devices available to this host that are capable of fencing the target */ + GHashTable *devices; +} peer_device_info_t; + +GHashTable *stonith_remote_op_list = NULL; + +extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, + int call_options); + +static void request_peer_fencing(remote_fencing_op_t *op, + peer_device_info_t *peer); +static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup); +static void report_timeout_period(remote_fencing_op_t * op, int op_timeout); +static int get_op_total_timeout(const remote_fencing_op_t *op, + const peer_device_info_t *chosen_peer); + +static gint +sort_strings(gconstpointer a, gconstpointer b) +{ + return strcmp(a, b); +} + +static void +free_remote_query(gpointer data) +{ + if (data != NULL) { + peer_device_info_t *peer = data; + + g_hash_table_destroy(peer->devices); + free(peer->host); + free(peer); + } +} + +void +free_stonith_remote_op_list(void) +{ + if (stonith_remote_op_list != NULL) { + g_hash_table_destroy(stonith_remote_op_list); + stonith_remote_op_list = NULL; + } +} + +struct peer_count_data { + const remote_fencing_op_t *op; + gboolean verified_only; + uint32_t support_action_only; + int count; +}; + +/*! + * \internal + * \brief Increment a counter if a device has not been executed yet + * + * \param[in] key Device ID (ignored) + * \param[in] value Device properties + * \param[in,out] user_data Peer count data + */ +static void +count_peer_device(gpointer key, gpointer value, gpointer user_data) +{ + device_properties_t *props = (device_properties_t*)value; + struct peer_count_data *data = user_data; + + if (!props->executed[data->op->phase] + && (!data->verified_only || props->verified) + && ((data->support_action_only == st_device_supports_none) || pcmk_is_set(props->device_support_flags, data->support_action_only))) { + ++(data->count); + } +} + +/*! + * \internal + * \brief Check the number of available devices in a peer's query results + * + * \param[in] op Operation that results are for + * \param[in] peer Peer to count + * \param[in] verified_only Whether to count only verified devices + * \param[in] support_action_only Whether to count only devices that support action + * + * \return Number of devices available to peer that were not already executed + */ +static int +count_peer_devices(const remote_fencing_op_t *op, + const peer_device_info_t *peer, gboolean verified_only, uint32_t support_on_action_only) +{ + struct peer_count_data data; + + data.op = op; + data.verified_only = verified_only; + data.support_action_only = support_on_action_only; + data.count = 0; + if (peer) { + g_hash_table_foreach(peer->devices, count_peer_device, &data); + } + return data.count; +} + +/*! + * \internal + * \brief Search for a device in a query result + * + * \param[in] op Operation that result is for + * \param[in] peer Query result for a peer + * \param[in] device Device ID to search for + * + * \return Device properties if found, NULL otherwise + */ +static device_properties_t * +find_peer_device(const remote_fencing_op_t *op, const peer_device_info_t *peer, + const char *device, uint32_t support_action_only) +{ + device_properties_t *props = g_hash_table_lookup(peer->devices, device); + + if (props && support_action_only != st_device_supports_none && !pcmk_is_set(props->device_support_flags, support_action_only)) { + return NULL; + } + return (props && !props->executed[op->phase] + && !props->disallowed[op->phase])? props : NULL; +} + +/*! + * \internal + * \brief Find a device in a peer's device list and mark it as executed + * + * \param[in] op Operation that peer result is for + * \param[in,out] peer Peer with results to search + * \param[in] device ID of device to mark as done + * \param[in] verified_devices_only Only consider verified devices + * + * \return TRUE if device was found and marked, FALSE otherwise + */ +static gboolean +grab_peer_device(const remote_fencing_op_t *op, peer_device_info_t *peer, + const char *device, gboolean verified_devices_only) +{ + device_properties_t *props = find_peer_device(op, peer, device, + fenced_support_flag(op->action)); + + if ((props == NULL) || (verified_devices_only && !props->verified)) { + return FALSE; + } + + crm_trace("Removing %s from %s (%d remaining)", + device, peer->host, count_peer_devices(op, peer, FALSE, st_device_supports_none)); + props->executed[op->phase] = TRUE; + return TRUE; +} + +static void +clear_remote_op_timers(remote_fencing_op_t * op) +{ + if (op->query_timer) { + g_source_remove(op->query_timer); + op->query_timer = 0; + } + if (op->op_timer_total) { + g_source_remove(op->op_timer_total); + op->op_timer_total = 0; + } + if (op->op_timer_one) { + g_source_remove(op->op_timer_one); + op->op_timer_one = 0; + } +} + +static void +free_remote_op(gpointer data) +{ + remote_fencing_op_t *op = data; + + crm_log_xml_debug(op->request, "Destroying"); + + clear_remote_op_timers(op); + + free(op->id); + free(op->action); + free(op->delegate); + free(op->target); + free(op->client_id); + free(op->client_name); + free(op->originator); + + if (op->query_results) { + g_list_free_full(op->query_results, free_remote_query); + } + if (op->request) { + free_xml(op->request); + op->request = NULL; + } + if (op->devices_list) { + g_list_free_full(op->devices_list, free); + op->devices_list = NULL; + } + g_list_free_full(op->automatic_list, free); + g_list_free(op->duplicates); + + pcmk__reset_result(&op->result); + free(op); +} + +void +init_stonith_remote_op_hash_table(GHashTable **table) +{ + if (*table == NULL) { + *table = pcmk__strkey_table(NULL, free_remote_op); + } +} + +/*! + * \internal + * \brief Return an operation's originally requested action (before any remap) + * + * \param[in] op Operation to check + * + * \return Operation's original action + */ +static const char * +op_requested_action(const remote_fencing_op_t *op) +{ + return ((op->phase > st_phase_requested)? "reboot" : op->action); +} + +/*! + * \internal + * \brief Remap a "reboot" operation to the "off" phase + * + * \param[in,out] op Operation to remap + */ +static void +op_phase_off(remote_fencing_op_t *op) +{ + crm_info("Remapping multiple-device reboot targeting %s to 'off' " + CRM_XS " id=%.8s", op->target, op->id); + op->phase = st_phase_off; + + /* Happily, "off" and "on" are shorter than "reboot", so we can reuse the + * memory allocation at each phase. + */ + strcpy(op->action, "off"); +} + +/*! + * \internal + * \brief Advance a remapped reboot operation to the "on" phase + * + * \param[in,out] op Operation to remap + */ +static void +op_phase_on(remote_fencing_op_t *op) +{ + GList *iter = NULL; + + crm_info("Remapped 'off' targeting %s complete, " + "remapping to 'on' for %s " CRM_XS " id=%.8s", + op->target, op->client_name, op->id); + op->phase = st_phase_on; + strcpy(op->action, "on"); + + /* Skip devices with automatic unfencing, because the cluster will handle it + * when the node rejoins. + */ + for (iter = op->automatic_list; iter != NULL; iter = iter->next) { + GList *match = g_list_find_custom(op->devices_list, iter->data, + sort_strings); + + if (match) { + op->devices_list = g_list_remove(op->devices_list, match->data); + } + } + g_list_free_full(op->automatic_list, free); + op->automatic_list = NULL; + + /* Rewind device list pointer */ + op->devices = op->devices_list; +} + +/*! + * \internal + * \brief Reset a remapped reboot operation + * + * \param[in,out] op Operation to reset + */ +static void +undo_op_remap(remote_fencing_op_t *op) +{ + if (op->phase > 0) { + crm_info("Undoing remap of reboot targeting %s for %s " + CRM_XS " id=%.8s", op->target, op->client_name, op->id); + op->phase = st_phase_requested; + strcpy(op->action, "reboot"); + } +} + +/*! + * \internal + * \brief Create notification data XML for a fencing operation result + * + * \param[in] op Fencer operation that completed + * + * \return Newly created XML to add as notification data + * \note The caller is responsible for freeing the result. + */ +static xmlNode * +fencing_result2xml(const remote_fencing_op_t *op) +{ + xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); + + crm_xml_add_int(notify_data, "state", op->state); + crm_xml_add(notify_data, F_STONITH_TARGET, op->target); + crm_xml_add(notify_data, F_STONITH_ACTION, op->action); + crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate); + crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id); + crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator); + crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id); + crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name); + + return notify_data; +} + +/*! + * \internal + * \brief Broadcast a fence result notification to all CPG peers + * + * \param[in] op Fencer operation that completed + * \param[in] op_merged Whether this operation is a duplicate of another + */ +void +fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged) +{ + static int count = 0; + xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY); + xmlNode *notify_data = fencing_result2xml(op); + + count++; + crm_trace("Broadcasting result to peers"); + crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY); + crm_xml_add(bcast, F_SUBTYPE, "broadcast"); + crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY); + crm_xml_add_int(bcast, "count", count); + + if (op_merged) { + pcmk__xe_set_bool_attr(bcast, F_STONITH_MERGED, true); + } + + stonith__xe_set_result(notify_data, &op->result); + + add_message_xml(bcast, F_STONITH_CALLDATA, notify_data); + send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE); + free_xml(notify_data); + free_xml(bcast); + + return; +} + +/*! + * \internal + * \brief Reply to a local request originator and notify all subscribed clients + * + * \param[in,out] op Fencer operation that completed + * \param[in,out] data Top-level XML to add notification to + */ +static void +handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data) +{ + xmlNode *notify_data = NULL; + xmlNode *reply = NULL; + pcmk__client_t *client = NULL; + + if (op->notify_sent == TRUE) { + /* nothing to do */ + return; + } + + /* Do notification with a clean data object */ + crm_xml_add_int(data, "state", op->state); + crm_xml_add(data, F_STONITH_TARGET, op->target); + crm_xml_add(data, F_STONITH_OPERATION, op->action); + + reply = fenced_construct_reply(op->request, data, &op->result); + crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate); + + /* Send fencing OP reply to local client that initiated fencing */ + client = pcmk__find_client_by_id(op->client_id); + if (client == NULL) { + crm_trace("Skipping reply to %s: no longer a client", op->client_id); + } else { + do_local_reply(reply, client, op->call_options); + } + + /* bcast to all local clients that the fencing operation happend */ + notify_data = fencing_result2xml(op); + fenced_send_notification(T_STONITH_NOTIFY_FENCE, &op->result, notify_data); + free_xml(notify_data); + fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL); + + /* mark this op as having notify's already sent */ + op->notify_sent = TRUE; + free_xml(reply); +} + +/*! + * \internal + * \brief Finalize all duplicates of a given fencer operation + * + * \param[in,out] op Fencer operation that completed + * \param[in,out] data Top-level XML to add notification to + */ +static void +finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data) +{ + for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) { + remote_fencing_op_t *other = iter->data; + + if (other->state == st_duplicate) { + other->state = op->state; + crm_debug("Performing duplicate notification for %s@%s: %s " + CRM_XS " id=%.8s", + other->client_name, other->originator, + pcmk_exec_status_str(op->result.execution_status), + other->id); + pcmk__copy_result(&op->result, &other->result); + finalize_op(other, data, true); + + } else { + // Possible if (for example) it timed out already + crm_err("Skipping duplicate notification for %s@%s " + CRM_XS " state=%s id=%.8s", + other->client_name, other->originator, + stonith_op_state_str(other->state), other->id); + } + } +} + +static char * +delegate_from_xml(xmlNode *xml) +{ + xmlNode *match = get_xpath_object("//@" F_STONITH_DELEGATE, xml, LOG_NEVER); + + if (match == NULL) { + return crm_element_value_copy(xml, F_ORIG); + } else { + return crm_element_value_copy(match, F_STONITH_DELEGATE); + } +} + +/*! + * \internal + * \brief Finalize a peer fencing operation + * + * Clean up after a fencing operation completes. This function has two code + * paths: the executioner uses it to broadcast the result to CPG peers, and then + * each peer (including the executioner) uses it to process that broadcast and + * notify its IPC clients of the result. + * + * \param[in,out] op Fencer operation that completed + * \param[in,out] data If not NULL, XML reply of last delegated operation + * \param[in] dup Whether this operation is a duplicate of another + * (in which case, do not broadcast the result) + * + * \note The operation result should be set before calling this function. + */ +static void +finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup) +{ + int level = LOG_ERR; + const char *subt = NULL; + xmlNode *local_data = NULL; + gboolean op_merged = FALSE; + + CRM_CHECK((op != NULL), return); + + // This is a no-op if timers have already been cleared + clear_remote_op_timers(op); + + if (op->notify_sent) { + // Most likely, this is a timed-out action that eventually completed + crm_notice("Operation '%s'%s%s by %s for %s@%s%s: " + "Result arrived too late " CRM_XS " id=%.8s", + op->action, (op->target? " targeting " : ""), + (op->target? op->target : ""), + (op->delegate? op->delegate : "unknown node"), + op->client_name, op->originator, + (op_merged? " (merged)" : ""), + op->id); + return; + } + + set_fencing_completed(op); + undo_op_remap(op); + + if (data == NULL) { + data = create_xml_node(NULL, "remote-op"); + local_data = data; + + } else if (op->delegate == NULL) { + switch (op->result.execution_status) { + case PCMK_EXEC_NO_FENCE_DEVICE: + break; + + case PCMK_EXEC_INVALID: + if (op->result.exit_status != CRM_EX_EXPIRED) { + op->delegate = delegate_from_xml(data); + } + break; + + default: + op->delegate = delegate_from_xml(data); + break; + } + } + + if (dup || (crm_element_value(data, F_STONITH_MERGED) != NULL)) { + op_merged = true; + } + + /* Tell everyone the operation is done, we will continue + * with doing the local notifications once we receive + * the broadcast back. */ + subt = crm_element_value(data, F_SUBTYPE); + if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) { + /* Defer notification until the bcast message arrives */ + fenced_broadcast_op_result(op, op_merged); + free_xml(local_data); + return; + } + + if (pcmk__result_ok(&op->result) || dup + || !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) { + level = LOG_NOTICE; + } + do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s (%s%s%s) " + CRM_XS " id=%.8s", op->action, (op->target? " targeting " : ""), + (op->target? op->target : ""), + (op->delegate? op->delegate : "unknown node"), + op->client_name, op->originator, + (op_merged? " (merged)" : ""), + crm_exit_str(op->result.exit_status), + pcmk_exec_status_str(op->result.execution_status), + ((op->result.exit_reason == NULL)? "" : ": "), + ((op->result.exit_reason == NULL)? "" : op->result.exit_reason), + op->id); + + handle_local_reply_and_notify(op, data); + + if (!dup) { + finalize_op_duplicates(op, data); + } + + /* Free non-essential parts of the record + * Keep the record around so we can query the history + */ + if (op->query_results) { + g_list_free_full(op->query_results, free_remote_query); + op->query_results = NULL; + } + if (op->request) { + free_xml(op->request); + op->request = NULL; + } + + free_xml(local_data); +} + +/*! + * \internal + * \brief Finalize a watchdog fencer op after the waiting time expires + * + * \param[in,out] userdata Fencer operation that completed + * + * \return G_SOURCE_REMOVE (which tells glib not to restart timer) + */ +static gboolean +remote_op_watchdog_done(gpointer userdata) +{ + remote_fencing_op_t *op = userdata; + + op->op_timer_one = 0; + + crm_notice("Self-fencing (%s) by %s for %s assumed complete " + CRM_XS " id=%.8s", + op->action, op->target, op->client_name, op->id); + op->state = st_done; + pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + finalize_op(op, NULL, false); + return G_SOURCE_REMOVE; +} + +static gboolean +remote_op_timeout_one(gpointer userdata) +{ + remote_fencing_op_t *op = userdata; + + op->op_timer_one = 0; + + crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS + " id=%.8s", op->action, op->target, op->client_name, op->id); + pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, + "Peer did not return fence result within timeout"); + + // The requested delay has been applied for the first device + if (op->delay > 0) { + op->delay = 0; + crm_trace("Try another device for '%s' action targeting %s " + "for client %s without delay " CRM_XS " id=%.8s", + op->action, op->target, op->client_name, op->id); + } + + // Try another device, if appropriate + request_peer_fencing(op, NULL); + return G_SOURCE_REMOVE; +} + +/*! + * \internal + * \brief Finalize a remote fencer operation that timed out + * + * \param[in,out] op Fencer operation that timed out + * \param[in] reason Readable description of what step timed out + */ +static void +finalize_timed_out_op(remote_fencing_op_t *op, const char *reason) +{ + crm_debug("Action '%s' targeting %s for client %s timed out " + CRM_XS " id=%.8s", + op->action, op->target, op->client_name, op->id); + + if (op->phase == st_phase_on) { + /* A remapped reboot operation timed out in the "on" phase, but the + * "off" phase completed successfully, so quit trying any further + * devices, and return success. + */ + op->state = st_done; + pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + } else { + op->state = st_failed; + pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason); + } + finalize_op(op, NULL, false); +} + +/*! + * \internal + * \brief Finalize a remote fencer operation that timed out + * + * \param[in,out] userdata Fencer operation that timed out + * + * \return G_SOURCE_REMOVE (which tells glib not to restart timer) + */ +static gboolean +remote_op_timeout(gpointer userdata) +{ + remote_fencing_op_t *op = userdata; + + op->op_timer_total = 0; + + if (op->state == st_done) { + crm_debug("Action '%s' targeting %s for client %s already completed " + CRM_XS " id=%.8s", + op->action, op->target, op->client_name, op->id); + } else { + finalize_timed_out_op(userdata, "Fencing did not complete within a " + "total timeout based on the " + "configured timeout and retries for " + "any devices attempted"); + } + return G_SOURCE_REMOVE; +} + +static gboolean +remote_op_query_timeout(gpointer data) +{ + remote_fencing_op_t *op = data; + + op->query_timer = 0; + + if (op->state == st_done) { + crm_debug("Operation %.8s targeting %s already completed", + op->id, op->target); + } else if (op->state == st_exec) { + crm_debug("Operation %.8s targeting %s already in progress", + op->id, op->target); + } else if (op->query_results) { + // Query succeeded, so attempt the actual fencing + crm_debug("Query %.8s targeting %s complete (state=%s)", + op->id, op->target, stonith_op_state_str(op->state)); + request_peer_fencing(op, NULL); + } else { + crm_debug("Query %.8s targeting %s timed out (state=%s)", + op->id, op->target, stonith_op_state_str(op->state)); + finalize_timed_out_op(op, "No capable peers replied to device query " + "within timeout"); + } + + return G_SOURCE_REMOVE; +} + +static gboolean +topology_is_empty(stonith_topology_t *tp) +{ + int i; + + if (tp == NULL) { + return TRUE; + } + + for (i = 0; i < ST_LEVEL_MAX; i++) { + if (tp->levels[i] != NULL) { + return FALSE; + } + } + return TRUE; +} + +/*! + * \internal + * \brief Add a device to an operation's automatic unfencing list + * + * \param[in,out] op Operation to modify + * \param[in] device Device ID to add + */ +static void +add_required_device(remote_fencing_op_t *op, const char *device) +{ + GList *match = g_list_find_custom(op->automatic_list, device, + sort_strings); + + if (!match) { + op->automatic_list = g_list_prepend(op->automatic_list, strdup(device)); + } +} + +/*! + * \internal + * \brief Remove a device from the automatic unfencing list + * + * \param[in,out] op Operation to modify + * \param[in] device Device ID to remove + */ +static void +remove_required_device(remote_fencing_op_t *op, const char *device) +{ + GList *match = g_list_find_custom(op->automatic_list, device, + sort_strings); + + if (match) { + op->automatic_list = g_list_remove(op->automatic_list, match->data); + } +} + +/* deep copy the device list */ +static void +set_op_device_list(remote_fencing_op_t * op, GList *devices) +{ + GList *lpc = NULL; + + if (op->devices_list) { + g_list_free_full(op->devices_list, free); + op->devices_list = NULL; + } + for (lpc = devices; lpc != NULL; lpc = lpc->next) { + op->devices_list = g_list_append(op->devices_list, strdup(lpc->data)); + } + op->devices = op->devices_list; +} + +/*! + * \internal + * \brief Check whether a node matches a topology target + * + * \param[in] tp Topology table entry to check + * \param[in] node Name of node to check + * + * \return TRUE if node matches topology target + */ +static gboolean +topology_matches(const stonith_topology_t *tp, const char *node) +{ + regex_t r_patt; + + CRM_CHECK(node && tp && tp->target, return FALSE); + switch (tp->kind) { + case fenced_target_by_attribute: + /* This level targets by attribute, so tp->target is a NAME=VALUE pair + * of a permanent attribute applied to targeted nodes. The test below + * relies on the locally cached copy of the CIB, so if fencing needs to + * be done before the initial CIB is received or after a malformed CIB + * is received, then the topology will be unable to be used. + */ + if (node_has_attr(node, tp->target_attribute, tp->target_value)) { + crm_notice("Matched %s with %s by attribute", node, tp->target); + return TRUE; + } + break; + + case fenced_target_by_pattern: + /* This level targets node names matching a pattern, so tp->target + * (and tp->target_pattern) is a regular expression. + */ + if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) { + crm_info("Bad regex '%s' for fencing level", tp->target); + } else { + int status = regexec(&r_patt, node, 0, NULL, 0); + + regfree(&r_patt); + if (status == 0) { + crm_notice("Matched %s with %s by name", node, tp->target); + return TRUE; + } + } + break; + + case fenced_target_by_name: + crm_trace("Testing %s against %s", node, tp->target); + return pcmk__str_eq(tp->target, node, pcmk__str_casei); + + default: + break; + } + crm_trace("No match for %s with %s", node, tp->target); + return FALSE; +} + +stonith_topology_t * +find_topology_for_host(const char *host) +{ + GHashTableIter tIter; + stonith_topology_t *tp = g_hash_table_lookup(topology, host); + + if(tp != NULL) { + crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology)); + return tp; + } + + g_hash_table_iter_init(&tIter, topology); + while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) { + if (topology_matches(tp, host)) { + crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology)); + return tp; + } + } + + crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology)); + return NULL; +} + +/*! + * \internal + * \brief Set fencing operation's device list to target's next topology level + * + * \param[in,out] op Remote fencing operation to modify + * \param[in] empty_ok If true, an operation without a target (i.e. + * queries) or a target without a topology will get a + * pcmk_rc_ok return value instead of ENODEV + * + * \return Standard Pacemaker return value + */ +static int +advance_topology_level(remote_fencing_op_t *op, bool empty_ok) +{ + stonith_topology_t *tp = NULL; + + if (op->target) { + tp = find_topology_for_host(op->target); + } + if (topology_is_empty(tp)) { + return empty_ok? pcmk_rc_ok : ENODEV; + } + + CRM_ASSERT(tp->levels != NULL); + + stonith__set_call_options(op->call_options, op->id, st_opt_topology); + + /* This is a new level, so undo any remapping left over from previous */ + undo_op_remap(op); + + do { + op->level++; + + } while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL); + + if (op->level < ST_LEVEL_MAX) { + crm_trace("Attempting fencing level %d targeting %s (%d devices) " + "for client %s@%s (id=%.8s)", + op->level, op->target, g_list_length(tp->levels[op->level]), + op->client_name, op->originator, op->id); + set_op_device_list(op, tp->levels[op->level]); + + // The requested delay has been applied for the first fencing level + if (op->level > 1 && op->delay > 0) { + op->delay = 0; + } + + if ((g_list_next(op->devices_list) != NULL) + && pcmk__str_eq(op->action, "reboot", pcmk__str_none)) { + /* A reboot has been requested for a topology level with multiple + * devices. Instead of rebooting the devices sequentially, we will + * turn them all off, then turn them all on again. (Think about + * switched power outlets for redundant power supplies.) + */ + op_phase_off(op); + } + return pcmk_rc_ok; + } + + crm_info("All %sfencing options targeting %s for client %s@%s failed " + CRM_XS " id=%.8s", + (stonith_watchdog_timeout_ms > 0)?"non-watchdog ":"", + op->target, op->client_name, op->originator, op->id); + return ENODEV; +} + +/*! + * \internal + * \brief If fencing operation is a duplicate, merge it into the other one + * + * \param[in,out] op Fencing operation to check + */ +static void +merge_duplicates(remote_fencing_op_t *op) +{ + GHashTableIter iter; + remote_fencing_op_t *other = NULL; + + time_t now = time(NULL); + + g_hash_table_iter_init(&iter, stonith_remote_op_list); + while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) { + const char *other_action = op_requested_action(other); + + if (!strcmp(op->id, other->id)) { + continue; // Don't compare against self + } + if (other->state > st_exec) { + crm_trace("%.8s not duplicate of %.8s: not in progress", + op->id, other->id); + continue; + } + if (!pcmk__str_eq(op->target, other->target, pcmk__str_casei)) { + crm_trace("%.8s not duplicate of %.8s: node %s vs. %s", + op->id, other->id, op->target, other->target); + continue; + } + if (!pcmk__str_eq(op->action, other_action, pcmk__str_none)) { + crm_trace("%.8s not duplicate of %.8s: action %s vs. %s", + op->id, other->id, op->action, other_action); + continue; + } + if (pcmk__str_eq(op->client_name, other->client_name, pcmk__str_casei)) { + crm_trace("%.8s not duplicate of %.8s: same client %s", + op->id, other->id, op->client_name); + continue; + } + if (pcmk__str_eq(other->target, other->originator, pcmk__str_casei)) { + crm_trace("%.8s not duplicate of %.8s: suicide for %s", + op->id, other->id, other->target); + continue; + } + if (!fencing_peer_active(crm_get_peer(0, other->originator))) { + crm_notice("Failing action '%s' targeting %s originating from " + "client %s@%s: Originator is dead " CRM_XS " id=%.8s", + other->action, other->target, other->client_name, + other->originator, other->id); + crm_trace("%.8s not duplicate of %.8s: originator dead", + op->id, other->id); + other->state = st_failed; + continue; + } + if ((other->total_timeout > 0) + && (now > (other->total_timeout + other->created))) { + crm_trace("%.8s not duplicate of %.8s: old (%ld vs. %ld + %d)", + op->id, other->id, now, other->created, + other->total_timeout); + continue; + } + + /* There is another in-flight request to fence the same host + * Piggyback on that instead. If it fails, so do we. + */ + other->duplicates = g_list_append(other->duplicates, op); + if (other->total_timeout == 0) { + other->total_timeout = op->total_timeout = + TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL); + crm_trace("Best guess as to timeout used for %.8s: %d", + other->id, other->total_timeout); + } + crm_notice("Merging fencing action '%s' targeting %s originating from " + "client %s with identical request from %s@%s " + CRM_XS " original=%.8s duplicate=%.8s total_timeout=%ds", + op->action, op->target, op->client_name, + other->client_name, other->originator, + op->id, other->id, other->total_timeout); + report_timeout_period(op, other->total_timeout); + op->state = st_duplicate; + } +} + +static uint32_t fencing_active_peers(void) +{ + uint32_t count = 0; + crm_node_t *entry; + GHashTableIter gIter; + + g_hash_table_iter_init(&gIter, crm_peer_cache); + while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { + if(fencing_peer_active(entry)) { + count++; + } + } + return count; +} + +/*! + * \internal + * \brief Process a manual confirmation of a pending fence action + * + * \param[in] client IPC client that sent confirmation + * \param[in,out] msg Request XML with manual confirmation + * + * \return Standard Pacemaker return code + */ +int +fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg) +{ + remote_fencing_op_t *op = NULL; + xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); + + CRM_CHECK(dev != NULL, return EPROTO); + + crm_notice("Received manual confirmation that %s has been fenced", + pcmk__s(crm_element_value(dev, F_STONITH_TARGET), + "unknown target")); + op = initiate_remote_stonith_op(client, msg, TRUE); + if (op == NULL) { + return EPROTO; + } + op->state = st_done; + set_fencing_completed(op); + op->delegate = strdup("a human"); + + // For the fencer's purposes, the fencing operation is done + pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + finalize_op(op, msg, false); + + /* For the requester's purposes, the operation is still pending. The + * actual result will be sent asynchronously via the operation's done_cb(). + */ + return EINPROGRESS; +} + +/*! + * \internal + * \brief Create a new remote stonith operation + * + * \param[in] client ID of local stonith client that initiated the operation + * \param[in] request The request from the client that started the operation + * \param[in] peer TRUE if this operation is owned by another stonith peer + * (an operation owned by one peer is stored on all peers, + * but only the owner executes it; all nodes get the results + * once the owner finishes execution) + */ +void * +create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer) +{ + remote_fencing_op_t *op = NULL; + xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_NEVER); + int call_options = 0; + const char *operation = NULL; + + init_stonith_remote_op_hash_table(&stonith_remote_op_list); + + /* If this operation is owned by another node, check to make + * sure we haven't already created this operation. */ + if (peer && dev) { + const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); + + CRM_CHECK(op_id != NULL, return NULL); + + op = g_hash_table_lookup(stonith_remote_op_list, op_id); + if (op) { + crm_debug("Reusing existing remote fencing op %.8s for %s", + op_id, ((client == NULL)? "unknown client" : client)); + return op; + } + } + + op = calloc(1, sizeof(remote_fencing_op_t)); + CRM_ASSERT(op != NULL); + + crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout)); + // Value -1 means disable any static/random fencing delays + crm_element_value_int(request, F_STONITH_DELAY, &(op->delay)); + + if (peer && dev) { + op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID); + } else { + op->id = crm_generate_uuid(); + } + + g_hash_table_replace(stonith_remote_op_list, op->id, op); + + op->state = st_query; + op->replies_expected = fencing_active_peers(); + op->action = crm_element_value_copy(dev, F_STONITH_ACTION); + op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN); + op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE); /* May not be set */ + op->created = time(NULL); + + if (op->originator == NULL) { + /* Local or relayed request */ + op->originator = strdup(stonith_our_uname); + } + + CRM_LOG_ASSERT(client != NULL); + if (client) { + op->client_id = strdup(client); + } + + + /* For a RELAY operation, set fenced on the client. */ + operation = crm_element_value(request, F_STONITH_OPERATION); + + if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) { + op->client_name = crm_strdup_printf("%s.%lu", crm_system_name, + (unsigned long) getpid()); + } else { + op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME); + } + + op->target = crm_element_value_copy(dev, F_STONITH_TARGET); + op->request = copy_xml(request); /* TODO: Figure out how to avoid this */ + crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); + op->call_options = call_options; + + crm_element_value_int(request, F_STONITH_CALLID, &(op->client_callid)); + + crm_trace("%s new fencing op %s ('%s' targeting %s for client %s, " + "base timeout %d, %u %s expected)", + (peer && dev)? "Recorded" : "Generated", op->id, op->action, + op->target, op->client_name, op->base_timeout, + op->replies_expected, + pcmk__plural_alt(op->replies_expected, "reply", "replies")); + + if (op->call_options & st_opt_cs_nodeid) { + int nodeid; + crm_node_t *node; + + pcmk__scan_min_int(op->target, &nodeid, 0); + node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY); + + /* Ensure the conversion only happens once */ + stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid); + + if (node && node->uname) { + free(op->target); + op->target = strdup(node->uname); + + } else { + crm_warn("Could not expand nodeid '%s' into a host name", op->target); + } + } + + /* check to see if this is a duplicate operation of another in-flight operation */ + merge_duplicates(op); + + if (op->state != st_duplicate) { + /* kick history readers */ + fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL); + } + + /* safe to trim as long as that doesn't touch pending ops */ + stonith_fence_history_trim(); + + return op; +} + +/*! + * \internal + * \brief Create a peer fencing operation from a request, and initiate it + * + * \param[in] client IPC client that made request (NULL to get from request) + * \param[in] request Request XML + * \param[in] manual_ack Whether this is a manual action confirmation + * + * \return Newly created operation on success, otherwise NULL + */ +remote_fencing_op_t * +initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request, + gboolean manual_ack) +{ + int query_timeout = 0; + xmlNode *query = NULL; + const char *client_id = NULL; + remote_fencing_op_t *op = NULL; + const char *relay_op_id = NULL; + const char *operation = NULL; + + if (client) { + client_id = client->id; + } else { + client_id = crm_element_value(request, F_STONITH_CLIENTID); + } + + CRM_LOG_ASSERT(client_id != NULL); + op = create_remote_stonith_op(client_id, request, FALSE); + op->owner = TRUE; + if (manual_ack) { + return op; + } + + CRM_CHECK(op->action, return NULL); + + if (advance_topology_level(op, true) != pcmk_rc_ok) { + op->state = st_failed; + } + + switch (op->state) { + case st_failed: + // advance_topology_level() exhausted levels + pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, + "All topology levels failed"); + crm_warn("Could not request peer fencing (%s) targeting %s " + CRM_XS " id=%.8s", op->action, op->target, op->id); + finalize_op(op, NULL, false); + return op; + + case st_duplicate: + crm_info("Requesting peer fencing (%s) targeting %s (duplicate) " + CRM_XS " id=%.8s", op->action, op->target, op->id); + return op; + + default: + crm_notice("Requesting peer fencing (%s) targeting %s " + CRM_XS " id=%.8s state=%s base_timeout=%d", + op->action, op->target, op->id, + stonith_op_state_str(op->state), op->base_timeout); + } + + query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY, + NULL, op->call_options); + + crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id); + crm_xml_add(query, F_STONITH_TARGET, op->target); + crm_xml_add(query, F_STONITH_ACTION, op_requested_action(op)); + crm_xml_add(query, F_STONITH_ORIGIN, op->originator); + crm_xml_add(query, F_STONITH_CLIENTID, op->client_id); + crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name); + crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout); + + /* In case of RELAY operation, RELAY information is added to the query to delete the original operation of RELAY. */ + operation = crm_element_value(request, F_STONITH_OPERATION); + if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) { + relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID); + if (relay_op_id) { + crm_xml_add(query, F_STONITH_REMOTE_OP_ID_RELAY, relay_op_id); + } + } + + send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); + free_xml(query); + + query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR; + op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op); + + return op; +} + +enum find_best_peer_options { + /*! Skip checking the target peer for capable fencing devices */ + FIND_PEER_SKIP_TARGET = 0x0001, + /*! Only check the target peer for capable fencing devices */ + FIND_PEER_TARGET_ONLY = 0x0002, + /*! Skip peers and devices that are not verified */ + FIND_PEER_VERIFIED_ONLY = 0x0004, +}; + +static peer_device_info_t * +find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options) +{ + GList *iter = NULL; + gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE; + + if (!device && pcmk_is_set(op->call_options, st_opt_topology)) { + return NULL; + } + + for (iter = op->query_results; iter != NULL; iter = iter->next) { + peer_device_info_t *peer = iter->data; + + crm_trace("Testing result from %s targeting %s with %d device%s: %d %x", + peer->host, op->target, peer->ndevices, + pcmk__plural_s(peer->ndevices), peer->tried, options); + if ((options & FIND_PEER_SKIP_TARGET) && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { + continue; + } + if ((options & FIND_PEER_TARGET_ONLY) && !pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { + continue; + } + + if (pcmk_is_set(op->call_options, st_opt_topology)) { + + if (grab_peer_device(op, peer, device, verified_devices_only)) { + return peer; + } + + } else if (!peer->tried + && count_peer_devices(op, peer, verified_devices_only, + fenced_support_flag(op->action))) { + /* No topology: Use the current best peer */ + crm_trace("Simple fencing"); + return peer; + } + } + + return NULL; +} + +static peer_device_info_t * +stonith_choose_peer(remote_fencing_op_t * op) +{ + const char *device = NULL; + peer_device_info_t *peer = NULL; + uint32_t active = fencing_active_peers(); + + do { + if (op->devices) { + device = op->devices->data; + crm_trace("Checking for someone to fence (%s) %s using %s", + op->action, op->target, device); + } else { + crm_trace("Checking for someone to fence (%s) %s", + op->action, op->target); + } + + /* Best choice is a peer other than the target with verified access */ + peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY); + if (peer) { + crm_trace("Found verified peer %s for %s", peer->host, device?device:""); + return peer; + } + + if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) { + crm_trace("Waiting before looking for unverified devices to fence %s", op->target); + return NULL; + } + + /* If no other peer has verified access, next best is unverified access */ + peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET); + if (peer) { + crm_trace("Found best unverified peer %s", peer->host); + return peer; + } + + /* If no other peer can do it, last option is self-fencing + * (which is never allowed for the "on" phase of a remapped reboot) + */ + if (op->phase != st_phase_on) { + peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY); + if (peer) { + crm_trace("%s will fence itself", peer->host); + return peer; + } + } + + /* Try the next fencing level if there is one (unless we're in the "on" + * phase of a remapped "reboot", because we ignore errors in that case) + */ + } while ((op->phase != st_phase_on) + && pcmk_is_set(op->call_options, st_opt_topology) + && (advance_topology_level(op, false) == pcmk_rc_ok)); + + if ((stonith_watchdog_timeout_ms > 0) + && pcmk__is_fencing_action(op->action) + && pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none) + && node_does_watchdog_fencing(op->target)) { + crm_info("Couldn't contact watchdog-fencing target-node (%s)", + op->target); + /* check_watchdog_fencing_and_wait will log additional info */ + } else { + crm_notice("Couldn't find anyone to fence (%s) %s using %s", + op->action, op->target, (device? device : "any device")); + } + return NULL; +} + +static int +get_device_timeout(const remote_fencing_op_t *op, + const peer_device_info_t *peer, const char *device, + bool with_delay) +{ + device_properties_t *props; + int delay = 0; + + if (!peer || !device) { + return op->base_timeout; + } + + props = g_hash_table_lookup(peer->devices, device); + if (!props) { + return op->base_timeout; + } + + // op->delay < 0 means disable any static/random fencing delays + if (with_delay && op->delay >= 0) { + // delay_base is eventually limited by delay_max + delay = (props->delay_max[op->phase] > 0 ? + props->delay_max[op->phase] : props->delay_base[op->phase]); + } + + return (props->custom_action_timeout[op->phase]? + props->custom_action_timeout[op->phase] : op->base_timeout) + + delay; +} + +struct timeout_data { + const remote_fencing_op_t *op; + const peer_device_info_t *peer; + int total_timeout; +}; + +/*! + * \internal + * \brief Add timeout to a total if device has not been executed yet + * + * \param[in] key GHashTable key (device ID) + * \param[in] value GHashTable value (device properties) + * \param[in,out] user_data Timeout data + */ +static void +add_device_timeout(gpointer key, gpointer value, gpointer user_data) +{ + const char *device_id = key; + device_properties_t *props = value; + struct timeout_data *timeout = user_data; + + if (!props->executed[timeout->op->phase] + && !props->disallowed[timeout->op->phase]) { + timeout->total_timeout += get_device_timeout(timeout->op, timeout->peer, + device_id, true); + } +} + +static int +get_peer_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer) +{ + struct timeout_data timeout; + + timeout.op = op; + timeout.peer = peer; + timeout.total_timeout = 0; + + g_hash_table_foreach(peer->devices, add_device_timeout, &timeout); + + return (timeout.total_timeout? timeout.total_timeout : op->base_timeout); +} + +static int +get_op_total_timeout(const remote_fencing_op_t *op, + const peer_device_info_t *chosen_peer) +{ + int total_timeout = 0; + stonith_topology_t *tp = find_topology_for_host(op->target); + + if (pcmk_is_set(op->call_options, st_opt_topology) && tp) { + int i; + GList *device_list = NULL; + GList *iter = NULL; + GList *auto_list = NULL; + + if (pcmk__str_eq(op->action, "on", pcmk__str_none) + && (op->automatic_list != NULL)) { + auto_list = g_list_copy(op->automatic_list); + } + + /* Yep, this looks scary, nested loops all over the place. + * Here is what is going on. + * Loop1: Iterate through fencing levels. + * Loop2: If a fencing level has devices, loop through each device + * Loop3: For each device in a fencing level, see what peer owns it + * and what that peer has reported the timeout is for the device. + */ + for (i = 0; i < ST_LEVEL_MAX; i++) { + if (!tp->levels[i]) { + continue; + } + for (device_list = tp->levels[i]; device_list; device_list = device_list->next) { + /* in case of watchdog-device we add the timeout to the budget + regardless of if we got a reply or not + */ + if ((stonith_watchdog_timeout_ms > 0) + && pcmk__is_fencing_action(op->action) + && pcmk__str_eq(device_list->data, STONITH_WATCHDOG_ID, + pcmk__str_none) + && node_does_watchdog_fencing(op->target)) { + total_timeout += stonith_watchdog_timeout_ms / 1000; + continue; + } + + for (iter = op->query_results; iter != NULL; iter = iter->next) { + const peer_device_info_t *peer = iter->data; + + if (auto_list) { + GList *match = g_list_find_custom(auto_list, device_list->data, + sort_strings); + if (match) { + auto_list = g_list_remove(auto_list, match->data); + } + } + + if (find_peer_device(op, peer, device_list->data, + fenced_support_flag(op->action))) { + total_timeout += get_device_timeout(op, peer, + device_list->data, + true); + break; + } + } /* End Loop3: match device with peer that owns device, find device's timeout period */ + } /* End Loop2: iterate through devices at a specific level */ + } /*End Loop1: iterate through fencing levels */ + + //Add only exists automatic_list device timeout + if (auto_list) { + for (iter = auto_list; iter != NULL; iter = iter->next) { + GList *iter2 = NULL; + + for (iter2 = op->query_results; iter2 != NULL; iter = iter2->next) { + peer_device_info_t *peer = iter2->data; + if (find_peer_device(op, peer, iter->data, st_device_supports_on)) { + total_timeout += get_device_timeout(op, peer, + iter->data, true); + break; + } + } + } + } + + g_list_free(auto_list); + + } else if (chosen_peer) { + total_timeout = get_peer_timeout(op, chosen_peer); + } else { + total_timeout = op->base_timeout; + } + + /* Take any requested fencing delay into account to prevent it from eating + * up the total timeout. + */ + return ((total_timeout ? total_timeout : op->base_timeout) + + (op->delay > 0 ? op->delay : 0)); +} + +static void +report_timeout_period(remote_fencing_op_t * op, int op_timeout) +{ + GList *iter = NULL; + xmlNode *update = NULL; + const char *client_node = NULL; + const char *client_id = NULL; + const char *call_id = NULL; + + if (op->call_options & st_opt_sync_call) { + /* There is no reason to report the timeout for a synchronous call. It + * is impossible to use the reported timeout to do anything when the client + * is blocking for the response. This update is only important for + * async calls that require a callback to report the results in. */ + return; + } else if (!op->request) { + return; + } + + crm_trace("Reporting timeout for %s (id=%.8s)", op->client_name, op->id); + client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE); + call_id = crm_element_value(op->request, F_STONITH_CALLID); + client_id = crm_element_value(op->request, F_STONITH_CLIENTID); + if (!client_node || !call_id || !client_id) { + return; + } + + if (pcmk__str_eq(client_node, stonith_our_uname, pcmk__str_casei)) { + // Client is connected to this node, so send update directly to them + do_stonith_async_timeout_update(client_id, call_id, op_timeout); + return; + } + + /* The client is connected to another node, relay this update to them */ + update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0); + crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id); + crm_xml_add(update, F_STONITH_CLIENTID, client_id); + crm_xml_add(update, F_STONITH_CALLID, call_id); + crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout); + + send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE); + + free_xml(update); + + for (iter = op->duplicates; iter != NULL; iter = iter->next) { + remote_fencing_op_t *dup = iter->data; + + crm_trace("Reporting timeout for duplicate %.8s to client %s", + dup->id, dup->client_name); + report_timeout_period(iter->data, op_timeout); + } +} + +/*! + * \internal + * \brief Advance an operation to the next device in its topology + * + * \param[in,out] op Fencer operation to advance + * \param[in] device ID of device that just completed + * \param[in,out] msg If not NULL, XML reply of last delegated operation + */ +static void +advance_topology_device_in_level(remote_fencing_op_t *op, const char *device, + xmlNode *msg) +{ + /* Advance to the next device at this topology level, if any */ + if (op->devices) { + op->devices = op->devices->next; + } + + /* Handle automatic unfencing if an "on" action was requested */ + if ((op->phase == st_phase_requested) + && pcmk__str_eq(op->action, "on", pcmk__str_none)) { + /* If the device we just executed was required, it's not anymore */ + remove_required_device(op, device); + + /* If there are no more devices at this topology level, run through any + * remaining devices with automatic unfencing + */ + if (op->devices == NULL) { + op->devices = op->automatic_list; + } + } + + if ((op->devices == NULL) && (op->phase == st_phase_off)) { + /* We're done with this level and with required devices, but we had + * remapped "reboot" to "off", so start over with "on". If any devices + * need to be turned back on, op->devices will be non-NULL after this. + */ + op_phase_on(op); + } + + // This function is only called if the previous device succeeded + pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + + if (op->devices) { + /* Necessary devices remain, so execute the next one */ + crm_trace("Next targeting %s on behalf of %s@%s", + op->target, op->client_name, op->originator); + + // The requested delay has been applied for the first device + if (op->delay > 0) { + op->delay = 0; + } + + request_peer_fencing(op, NULL); + } else { + /* We're done with all devices and phases, so finalize operation */ + crm_trace("Marking complex fencing op targeting %s as complete", + op->target); + op->state = st_done; + finalize_op(op, msg, false); + } +} + +static gboolean +check_watchdog_fencing_and_wait(remote_fencing_op_t * op) +{ + if (node_does_watchdog_fencing(op->target)) { + + crm_notice("Waiting %lds for %s to self-fence (%s) for " + "client %s " CRM_XS " id=%.8s", + (stonith_watchdog_timeout_ms / 1000), + op->target, op->action, op->client_name, op->id); + + if (op->op_timer_one) { + g_source_remove(op->op_timer_one); + } + op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, + remote_op_watchdog_done, op); + return TRUE; + } else { + crm_debug("Skipping fallback to watchdog-fencing as %s is " + "not in host-list", op->target); + } + return FALSE; +} + +/*! + * \internal + * \brief Ask a peer to execute a fencing operation + * + * \param[in,out] op Fencing operation to be executed + * \param[in,out] peer If NULL or topology is in use, choose best peer to + * execute the fencing, otherwise use this peer + */ +static void +request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) +{ + const char *device = NULL; + int timeout; + + CRM_CHECK(op != NULL, return); + + crm_trace("Action %.8s targeting %s for %s is %s", + op->id, op->target, op->client_name, + stonith_op_state_str(op->state)); + + if ((op->phase == st_phase_on) && (op->devices != NULL)) { + /* We are in the "on" phase of a remapped topology reboot. If this + * device has pcmk_reboot_action="off", or doesn't support the "on" + * action, skip it. + * + * We can't check device properties at this point because we haven't + * chosen a peer for this stage yet. Instead, we check the local node's + * knowledge about the device. If different versions of the fence agent + * are installed on different nodes, there's a chance this could be + * mistaken, but the worst that could happen is we don't try turning the + * node back on when we should. + */ + device = op->devices->data; + if (pcmk__str_eq(fenced_device_reboot_action(device), "off", + pcmk__str_none)) { + crm_info("Not turning %s back on using %s because the device is " + "configured to stay off (pcmk_reboot_action='off')", + op->target, device); + advance_topology_device_in_level(op, device, NULL); + return; + } + if (!fenced_device_supports_on(device)) { + crm_info("Not turning %s back on using %s because the agent " + "doesn't support 'on'", op->target, device); + advance_topology_device_in_level(op, device, NULL); + return; + } + } + + timeout = op->base_timeout; + if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) { + peer = stonith_choose_peer(op); + } + + if (!op->op_timer_total) { + op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, peer); + op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op); + report_timeout_period(op, op->total_timeout); + crm_info("Total timeout set to %d for peer's fencing targeting %s for %s" + CRM_XS "id=%.8s", + op->total_timeout, op->target, op->client_name, op->id); + } + + if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) { + /* Ignore the caller's peer preference if topology is in use, because + * that peer might not have access to the required device. With + * topology, stonith_choose_peer() removes the device from further + * consideration, so the timeout must be calculated beforehand. + * + * @TODO Basing the total timeout on the caller's preferred peer (above) + * is less than ideal. + */ + peer = stonith_choose_peer(op); + + device = op->devices->data; + /* Fencing timeout sent to peer takes no delay into account. + * The peer will add a dedicated timer for any delay upon + * schedule_stonith_command(). + */ + timeout = get_device_timeout(op, peer, device, false); + } + + if (peer) { + /* Take any requested fencing delay into account to prevent it from eating + * up the timeout. + */ + int timeout_one = (op->delay > 0 ? + TIMEOUT_MULTIPLY_FACTOR * op->delay : 0); + xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0); + + crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id); + crm_xml_add(remote_op, F_STONITH_TARGET, op->target); + crm_xml_add(remote_op, F_STONITH_ACTION, op->action); + crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator); + crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id); + crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name); + crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout); + crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options); + crm_xml_add_int(remote_op, F_STONITH_DELAY, op->delay); + + if (device) { + timeout_one += TIMEOUT_MULTIPLY_FACTOR * + get_device_timeout(op, peer, device, true); + crm_notice("Requesting that %s perform '%s' action targeting %s " + "using %s " CRM_XS " for client %s (%ds)", + peer->host, op->action, op->target, device, + op->client_name, timeout_one); + crm_xml_add(remote_op, F_STONITH_DEVICE, device); + + } else { + timeout_one += TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer); + crm_notice("Requesting that %s perform '%s' action targeting %s " + CRM_XS " for client %s (%ds, %lds)", + peer->host, op->action, op->target, op->client_name, + timeout_one, stonith_watchdog_timeout_ms); + } + + op->state = st_exec; + if (op->op_timer_one) { + g_source_remove(op->op_timer_one); + op->op_timer_one = 0; + } + + if (!((stonith_watchdog_timeout_ms > 0) + && (pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none) + || (pcmk__str_eq(peer->host, op->target, pcmk__str_casei) + && pcmk__is_fencing_action(op->action))) + && check_watchdog_fencing_and_wait(op))) { + + /* Some thoughts about self-fencing cases reaching this point: + - Actually check in check_watchdog_fencing_and_wait + shouldn't fail if STONITH_WATCHDOG_ID is + chosen as fencing-device and it being present implies + watchdog-fencing is enabled anyway + - If watchdog-fencing is disabled either in general or for + a specific target - detected in check_watchdog_fencing_and_wait - + for some other kind of self-fencing we can't expect + a success answer but timeout is fine if the node doesn't + come back in between + - Delicate might be the case where we have watchdog-fencing + enabled for a node but the watchdog-fencing-device isn't + explicitly chosen for suicide. Local pe-execution in sbd + may detect the node as unclean and lead to timely suicide. + Otherwise the selection of stonith-watchdog-timeout at + least is questionable. + */ + + /* coming here we're not waiting for watchdog timeout - + thus engage timer with timout evaluated before */ + op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op); + } + + send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE); + peer->tried = TRUE; + free_xml(remote_op); + return; + + } else if (op->phase == st_phase_on) { + /* A remapped "on" cannot be executed, but the node was already + * turned off successfully, so ignore the error and continue. + */ + crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s " + "after successful 'off'", device, op->target); + advance_topology_device_in_level(op, device, NULL); + return; + + } else if (op->owner == FALSE) { + crm_err("Fencing (%s) targeting %s for client %s is not ours to control", + op->action, op->target, op->client_name); + + } else if (op->query_timer == 0) { + /* We've exhausted all available peers */ + crm_info("No remaining peers capable of fencing (%s) %s for client %s " + CRM_XS " state=%s", op->action, op->target, op->client_name, + stonith_op_state_str(op->state)); + CRM_CHECK(op->state < st_done, return); + finalize_timed_out_op(op, "All nodes failed, or are unable, to " + "fence target"); + + } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) { + /* if the operation never left the query state, + * but we have all the expected replies, then no devices + * are available to execute the fencing operation. */ + + if(stonith_watchdog_timeout_ms > 0 && pcmk__str_eq(device, + STONITH_WATCHDOG_ID, pcmk__str_null_matches)) { + if (check_watchdog_fencing_and_wait(op)) { + return; + } + } + + if (op->state == st_query) { + crm_info("No peers (out of %d) have devices capable of fencing " + "(%s) %s for client %s " CRM_XS " state=%s", + op->replies, op->action, op->target, op->client_name, + stonith_op_state_str(op->state)); + + pcmk__reset_result(&op->result); + pcmk__set_result(&op->result, CRM_EX_ERROR, + PCMK_EXEC_NO_FENCE_DEVICE, NULL); + } else { + if (pcmk_is_set(op->call_options, st_opt_topology)) { + pcmk__reset_result(&op->result); + pcmk__set_result(&op->result, CRM_EX_ERROR, + PCMK_EXEC_NO_FENCE_DEVICE, NULL); + } + /* ... else use existing result from previous failed attempt + * (topology is not in use, and no devices remain to be attempted). + * Overwriting the result with PCMK_EXEC_NO_FENCE_DEVICE would + * prevent finalize_op() from setting the correct delegate if + * needed. + */ + + crm_info("No peers (out of %d) are capable of fencing (%s) %s " + "for client %s " CRM_XS " state=%s", + op->replies, op->action, op->target, op->client_name, + stonith_op_state_str(op->state)); + } + + op->state = st_failed; + finalize_op(op, NULL, false); + + } else { + crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s " + "for client %s " CRM_XS " id=%.8s", + op->action, op->target, (device? " using " : ""), + (device? device : ""), op->client_name, op->id); + } +} + +/*! + * \internal + * \brief Comparison function for sorting query results + * + * \param[in] a GList item to compare + * \param[in] b GList item to compare + * + * \return Per the glib documentation, "a negative integer if the first value + * comes before the second, 0 if they are equal, or a positive integer + * if the first value comes after the second." + */ +static gint +sort_peers(gconstpointer a, gconstpointer b) +{ + const peer_device_info_t *peer_a = a; + const peer_device_info_t *peer_b = b; + + return (peer_b->ndevices - peer_a->ndevices); +} + +/*! + * \internal + * \brief Determine if all the devices in the topology are found or not + * + * \param[in] op Fencing operation with topology to check + */ +static gboolean +all_topology_devices_found(const remote_fencing_op_t *op) +{ + GList *device = NULL; + GList *iter = NULL; + device_properties_t *match = NULL; + stonith_topology_t *tp = NULL; + gboolean skip_target = FALSE; + int i; + + tp = find_topology_for_host(op->target); + if (!tp) { + return FALSE; + } + if (pcmk__is_fencing_action(op->action)) { + /* Don't count the devices on the target node if we are killing + * the target node. */ + skip_target = TRUE; + } + + for (i = 0; i < ST_LEVEL_MAX; i++) { + for (device = tp->levels[i]; device; device = device->next) { + match = NULL; + for (iter = op->query_results; iter && !match; iter = iter->next) { + peer_device_info_t *peer = iter->data; + + if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { + continue; + } + match = find_peer_device(op, peer, device->data, st_device_supports_none); + } + if (!match) { + return FALSE; + } + } + } + + return TRUE; +} + +/*! + * \internal + * \brief Parse action-specific device properties from XML + * + * \param[in] xml XML element containing the properties + * \param[in] peer Name of peer that sent XML (for logs) + * \param[in] device Device ID (for logs) + * \param[in] action Action the properties relate to (for logs) + * \param[in,out] op Fencing operation that properties are being parsed for + * \param[in] phase Phase the properties relate to + * \param[in,out] props Device properties to update + */ +static void +parse_action_specific(const xmlNode *xml, const char *peer, const char *device, + const char *action, remote_fencing_op_t *op, + enum st_remap_phase phase, device_properties_t *props) +{ + props->custom_action_timeout[phase] = 0; + crm_element_value_int(xml, F_STONITH_ACTION_TIMEOUT, + &props->custom_action_timeout[phase]); + if (props->custom_action_timeout[phase]) { + crm_trace("Peer %s with device %s returned %s action timeout %d", + peer, device, action, props->custom_action_timeout[phase]); + } + + props->delay_max[phase] = 0; + crm_element_value_int(xml, F_STONITH_DELAY_MAX, &props->delay_max[phase]); + if (props->delay_max[phase]) { + crm_trace("Peer %s with device %s returned maximum of random delay %d for %s", + peer, device, props->delay_max[phase], action); + } + + props->delay_base[phase] = 0; + crm_element_value_int(xml, F_STONITH_DELAY_BASE, &props->delay_base[phase]); + if (props->delay_base[phase]) { + crm_trace("Peer %s with device %s returned base delay %d for %s", + peer, device, props->delay_base[phase], action); + } + + /* Handle devices with automatic unfencing */ + if (pcmk__str_eq(action, "on", pcmk__str_none)) { + int required = 0; + + crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required); + if (required) { + crm_trace("Peer %s requires device %s to execute for action %s", + peer, device, action); + add_required_device(op, device); + } + } + + /* If a reboot is remapped to off+on, it's possible that a node is allowed + * to perform one action but not another. + */ + if (pcmk__xe_attr_is_true(xml, F_STONITH_ACTION_DISALLOWED)) { + props->disallowed[phase] = TRUE; + crm_trace("Peer %s is disallowed from executing %s for device %s", + peer, action, device); + } +} + +/*! + * \internal + * \brief Parse one device's properties from peer's XML query reply + * + * \param[in] xml XML node containing device properties + * \param[in,out] op Operation that query and reply relate to + * \param[in,out] peer Peer's device information + * \param[in] device ID of device being parsed + */ +static void +add_device_properties(const xmlNode *xml, remote_fencing_op_t *op, + peer_device_info_t *peer, const char *device) +{ + xmlNode *child; + int verified = 0; + device_properties_t *props = calloc(1, sizeof(device_properties_t)); + int flags = st_device_supports_on; /* Old nodes that don't set the flag assume they support the on action */ + + /* Add a new entry to this peer's devices list */ + CRM_ASSERT(props != NULL); + g_hash_table_insert(peer->devices, strdup(device), props); + + /* Peers with verified (monitored) access will be preferred */ + crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified); + if (verified) { + crm_trace("Peer %s has confirmed a verified device %s", + peer->host, device); + props->verified = TRUE; + } + + crm_element_value_int(xml, F_STONITH_DEVICE_SUPPORT_FLAGS, &flags); + props->device_support_flags = flags; + + /* Parse action-specific device properties */ + parse_action_specific(xml, peer->host, device, op_requested_action(op), + op, st_phase_requested, props); + for (child = pcmk__xml_first_child(xml); child != NULL; + child = pcmk__xml_next(child)) { + /* Replies for "reboot" operations will include the action-specific + * values for "off" and "on" in child elements, just in case the reboot + * winds up getting remapped. + */ + if (pcmk__str_eq(ID(child), "off", pcmk__str_none)) { + parse_action_specific(child, peer->host, device, "off", + op, st_phase_off, props); + } else if (pcmk__str_eq(ID(child), "on", pcmk__str_none)) { + parse_action_specific(child, peer->host, device, "on", + op, st_phase_on, props); + } + } +} + +/*! + * \internal + * \brief Parse a peer's XML query reply and add it to operation's results + * + * \param[in,out] op Operation that query and reply relate to + * \param[in] host Name of peer that sent this reply + * \param[in] ndevices Number of devices expected in reply + * \param[in] xml XML node containing device list + * + * \return Newly allocated result structure with parsed reply + */ +static peer_device_info_t * +add_result(remote_fencing_op_t *op, const char *host, int ndevices, + const xmlNode *xml) +{ + peer_device_info_t *peer = calloc(1, sizeof(peer_device_info_t)); + xmlNode *child; + + // cppcheck seems not to understand the abort logic in CRM_CHECK + // cppcheck-suppress memleak + CRM_CHECK(peer != NULL, return NULL); + peer->host = strdup(host); + peer->devices = pcmk__strkey_table(free, free); + + /* Each child element describes one capable device available to the peer */ + for (child = pcmk__xml_first_child(xml); child != NULL; + child = pcmk__xml_next(child)) { + const char *device = ID(child); + + if (device) { + add_device_properties(child, op, peer, device); + } + } + + peer->ndevices = g_hash_table_size(peer->devices); + CRM_CHECK(ndevices == peer->ndevices, + crm_err("Query claimed to have %d device%s but %d found", + ndevices, pcmk__plural_s(ndevices), peer->ndevices)); + + op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers); + return peer; +} + +/*! + * \internal + * \brief Handle a peer's reply to our fencing query + * + * Parse a query result from XML and store it in the remote operation + * table, and when enough replies have been received, issue a fencing request. + * + * \param[in] msg XML reply received + * + * \return pcmk_ok on success, -errno on error + * + * \note See initiate_remote_stonith_op() for how the XML query was initially + * formed, and stonith_query() for how the peer formed its XML reply. + */ +int +process_remote_stonith_query(xmlNode *msg) +{ + int ndevices = 0; + gboolean host_is_target = FALSE; + gboolean have_all_replies = FALSE; + const char *id = NULL; + const char *host = NULL; + remote_fencing_op_t *op = NULL; + peer_device_info_t *peer = NULL; + uint32_t replies_expected; + xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); + + CRM_CHECK(dev != NULL, return -EPROTO); + + id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); + CRM_CHECK(id != NULL, return -EPROTO); + + dev = get_xpath_object("//@" F_STONITH_AVAILABLE_DEVICES, msg, LOG_ERR); + CRM_CHECK(dev != NULL, return -EPROTO); + crm_element_value_int(dev, F_STONITH_AVAILABLE_DEVICES, &ndevices); + + op = g_hash_table_lookup(stonith_remote_op_list, id); + if (op == NULL) { + crm_debug("Received query reply for unknown or expired operation %s", + id); + return -EOPNOTSUPP; + } + + replies_expected = fencing_active_peers(); + if (op->replies_expected < replies_expected) { + replies_expected = op->replies_expected; + } + if ((++op->replies >= replies_expected) && (op->state == st_query)) { + have_all_replies = TRUE; + } + host = crm_element_value(msg, F_ORIG); + host_is_target = pcmk__str_eq(host, op->target, pcmk__str_casei); + + crm_info("Query result %d of %d from %s for %s/%s (%d device%s) %s", + op->replies, replies_expected, host, + op->target, op->action, ndevices, pcmk__plural_s(ndevices), id); + if (ndevices > 0) { + peer = add_result(op, host, ndevices, dev); + } + + pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + + if (pcmk_is_set(op->call_options, st_opt_topology)) { + /* If we start the fencing before all the topology results are in, + * it is possible fencing levels will be skipped because of the missing + * query results. */ + if (op->state == st_query && all_topology_devices_found(op)) { + /* All the query results are in for the topology, start the fencing ops. */ + crm_trace("All topology devices found"); + request_peer_fencing(op, peer); + + } else if (have_all_replies) { + crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ", + replies_expected, op->replies); + request_peer_fencing(op, NULL); + } + + } else if (op->state == st_query) { + int nverified = count_peer_devices(op, peer, TRUE, + fenced_support_flag(op->action)); + + /* We have a result for a non-topology fencing op that looks promising, + * go ahead and start fencing before query timeout */ + if ((peer != NULL) && !host_is_target && nverified) { + /* we have a verified device living on a peer that is not the target */ + crm_trace("Found %d verified device%s", + nverified, pcmk__plural_s(nverified)); + request_peer_fencing(op, peer); + + } else if (have_all_replies) { + crm_info("All query replies have arrived, continuing (%d expected/%d received) ", + replies_expected, op->replies); + request_peer_fencing(op, NULL); + + } else { + crm_trace("Waiting for more peer results before launching fencing operation"); + } + + } else if ((peer != NULL) && (op->state == st_done)) { + crm_info("Discarding query result from %s (%d device%s): " + "Operation is %s", peer->host, + peer->ndevices, pcmk__plural_s(peer->ndevices), + stonith_op_state_str(op->state)); + } + + return pcmk_ok; +} + +/*! + * \internal + * \brief Handle a peer's reply to a fencing request + * + * Parse a fencing reply from XML, and either finalize the operation + * or attempt another device as appropriate. + * + * \param[in] msg XML reply received + */ +void +fenced_process_fencing_reply(xmlNode *msg) +{ + const char *id = NULL; + const char *device = NULL; + remote_fencing_op_t *op = NULL; + xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); + pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; + + CRM_CHECK(dev != NULL, return); + + id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); + CRM_CHECK(id != NULL, return); + + dev = stonith__find_xe_with_result(msg); + CRM_CHECK(dev != NULL, return); + + stonith__xe_get_result(dev, &result); + + device = crm_element_value(dev, F_STONITH_DEVICE); + + if (stonith_remote_op_list) { + op = g_hash_table_lookup(stonith_remote_op_list, id); + } + + if ((op == NULL) && pcmk__result_ok(&result)) { + /* Record successful fencing operations */ + const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID); + + op = create_remote_stonith_op(client_id, dev, TRUE); + } + + if (op == NULL) { + /* Could be for an event that began before we started */ + /* TODO: Record the op for later querying */ + crm_info("Received peer result of unknown or expired operation %s", id); + pcmk__reset_result(&result); + return; + } + + pcmk__reset_result(&op->result); + op->result = result; // The operation takes ownership of the result + + if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) { + crm_err("Received outdated reply for device %s (instead of %s) to " + "fence (%s) %s. Operation already timed out at peer level.", + device, (const char *) op->devices->data, op->action, op->target); + return; + } + + if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) { + if (pcmk__result_ok(&op->result)) { + op->state = st_done; + } else { + op->state = st_failed; + } + finalize_op(op, msg, false); + return; + + } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) { + /* If this isn't a remote level broadcast, and we are not the + * originator of the operation, we should not be receiving this msg. */ + crm_err("Received non-broadcast fencing result for operation %.8s " + "we do not own (device %s targeting %s)", + op->id, device, op->target); + return; + } + + if (pcmk_is_set(op->call_options, st_opt_topology)) { + const char *device = NULL; + const char *reason = op->result.exit_reason; + + /* We own the op, and it is complete. broadcast the result to all nodes + * and notify our local clients. */ + if (op->state == st_done) { + finalize_op(op, msg, false); + return; + } + + device = crm_element_value(msg, F_STONITH_DEVICE); + + if ((op->phase == 2) && !pcmk__result_ok(&op->result)) { + /* A remapped "on" failed, but the node was already turned off + * successfully, so ignore the error and continue. + */ + crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s " + "after successful 'off'", + device, pcmk_exec_status_str(op->result.execution_status), + (reason == NULL)? "" : ": ", + (reason == NULL)? "" : reason, + op->target); + pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + } else { + crm_notice("Action '%s' targeting %s%s%s on behalf of %s@%s: " + "%s%s%s%s", + op->action, op->target, + ((device == NULL)? "" : " using "), + ((device == NULL)? "" : device), + op->client_name, + op->originator, + pcmk_exec_status_str(op->result.execution_status), + (reason == NULL)? "" : " (", + (reason == NULL)? "" : reason, + (reason == NULL)? "" : ")"); + } + + if (pcmk__result_ok(&op->result)) { + /* An operation completed successfully. Try another device if + * necessary, otherwise mark the operation as done. */ + advance_topology_device_in_level(op, device, msg); + return; + } else { + /* This device failed, time to try another topology level. If no other + * levels are available, mark this operation as failed and report results. */ + if (advance_topology_level(op, false) != pcmk_rc_ok) { + op->state = st_failed; + finalize_op(op, msg, false); + return; + } + } + + } else if (pcmk__result_ok(&op->result) && (op->devices == NULL)) { + op->state = st_done; + finalize_op(op, msg, false); + return; + + } else if ((op->result.execution_status == PCMK_EXEC_TIMEOUT) + && (op->devices == NULL)) { + /* If the operation timed out don't bother retrying other peers. */ + op->state = st_failed; + finalize_op(op, msg, false); + return; + + } else { + /* fall-through and attempt other fencing action using another peer */ + } + + /* Retry on failure */ + crm_trace("Next for %s on behalf of %s@%s (result was: %s)", + op->target, op->originator, op->client_name, + pcmk_exec_status_str(op->result.execution_status)); + request_peer_fencing(op, NULL); +} + +gboolean +stonith_check_fence_tolerance(int tolerance, const char *target, const char *action) +{ + GHashTableIter iter; + time_t now = time(NULL); + remote_fencing_op_t *rop = NULL; + + if (tolerance <= 0 || !stonith_remote_op_list || target == NULL || + action == NULL) { + return FALSE; + } + + g_hash_table_iter_init(&iter, stonith_remote_op_list); + while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) { + if (strcmp(rop->target, target) != 0) { + continue; + } else if (rop->state != st_done) { + continue; + /* We don't have to worry about remapped reboots here + * because if state is done, any remapping has been undone + */ + } else if (strcmp(rop->action, action) != 0) { + continue; + } else if ((rop->completed + tolerance) < now) { + continue; + } + + crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s", + target, action, tolerance, rop->delegate, rop->originator); + return TRUE; + } + return FALSE; +} diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c new file mode 100644 index 0000000..4edda6c --- /dev/null +++ b/daemons/fenced/pacemaker-fenced.c @@ -0,0 +1,1751 @@ +/* + * Copyright 2009-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include // PRIu32, PRIx32 + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include + +#define SUMMARY "daemon for executing fencing devices in a Pacemaker cluster" + +char *stonith_our_uname = NULL; +long stonith_watchdog_timeout_ms = 0; +GList *stonith_watchdog_targets = NULL; + +static GMainLoop *mainloop = NULL; + +gboolean stand_alone = FALSE; +static gboolean stonith_shutdown_flag = FALSE; + +static qb_ipcs_service_t *ipcs = NULL; +static xmlNode *local_cib = NULL; +static pe_working_set_t *fenced_data_set = NULL; +static const unsigned long long data_set_flags = pe_flag_quick_location + | pe_flag_no_compat + | pe_flag_no_counts; + +static cib_t *cib_api = NULL; + +static pcmk__output_t *logger_out = NULL; +static pcmk__output_t *out = NULL; + +pcmk__supported_format_t formats[] = { + PCMK__SUPPORTED_FORMAT_NONE, + PCMK__SUPPORTED_FORMAT_TEXT, + PCMK__SUPPORTED_FORMAT_XML, + { NULL, NULL, NULL } +}; + +static struct { + bool no_cib_connect; + gchar **log_files; +} options; + +static crm_exit_t exit_code = CRM_EX_OK; + +static void stonith_shutdown(int nsig); +static void stonith_cleanup(void); + +static int32_t +st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) +{ + if (stonith_shutdown_flag) { + crm_info("Ignoring new client [%d] during shutdown", + pcmk__client_pid(c)); + return -EPERM; + } + + if (pcmk__new_client(c, uid, gid) == NULL) { + return -EIO; + } + return 0; +} + +/* Exit code means? */ +static int32_t +st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) +{ + uint32_t id = 0; + uint32_t flags = 0; + int call_options = 0; + xmlNode *request = NULL; + pcmk__client_t *c = pcmk__find_client(qbc); + const char *op = NULL; + + if (c == NULL) { + crm_info("Invalid client: %p", qbc); + return 0; + } + + request = pcmk__client_data2xml(c, data, &id, &flags); + if (request == NULL) { + pcmk__ipc_send_ack(c, id, flags, "nack", NULL, CRM_EX_PROTOCOL); + return 0; + } + + + op = crm_element_value(request, F_CRM_TASK); + if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) { + crm_xml_add(request, F_TYPE, T_STONITH_NG); + crm_xml_add(request, F_STONITH_OPERATION, op); + crm_xml_add(request, F_STONITH_CLIENTID, c->id); + crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c)); + crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); + + send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE); + free_xml(request); + return 0; + } + + if (c->name == NULL) { + const char *value = crm_element_value(request, F_STONITH_CLIENTNAME); + + if (value == NULL) { + value = "unknown"; + } + c->name = crm_strdup_printf("%s.%u", value, c->pid); + } + + crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); + crm_trace("Flags %#08" PRIx32 "/%#08x for command %" PRIu32 + " from client %s", flags, call_options, id, pcmk__client_name(c)); + + if (pcmk_is_set(call_options, st_opt_sync_call)) { + CRM_ASSERT(flags & crm_ipc_client_response); + CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */ + c->request_id = id; /* Reply only to the last one */ + } + + crm_xml_add(request, F_STONITH_CLIENTID, c->id); + crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c)); + crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); + + crm_log_xml_trace(request, "ipc-received"); + stonith_command(c, id, flags, request, NULL); + + free_xml(request); + return 0; +} + +/* Error code means? */ +static int32_t +st_ipc_closed(qb_ipcs_connection_t * c) +{ + pcmk__client_t *client = pcmk__find_client(c); + + if (client == NULL) { + return 0; + } + + crm_trace("Connection %p closed", c); + pcmk__free_client(client); + + /* 0 means: yes, go ahead and destroy the connection */ + return 0; +} + +static void +st_ipc_destroy(qb_ipcs_connection_t * c) +{ + crm_trace("Connection %p destroyed", c); + st_ipc_closed(c); +} + +static void +stonith_peer_callback(xmlNode * msg, void *private_data) +{ + const char *remote_peer = crm_element_value(msg, F_ORIG); + const char *op = crm_element_value(msg, F_STONITH_OPERATION); + + if (pcmk__str_eq(op, "poke", pcmk__str_none)) { + return; + } + + crm_log_xml_trace(msg, "Peer[inbound]"); + stonith_command(NULL, 0, 0, msg, remote_peer); +} + +#if SUPPORT_COROSYNC +static void +stonith_peer_ais_callback(cpg_handle_t handle, + const struct cpg_name *groupName, + uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) +{ + uint32_t kind = 0; + xmlNode *xml = NULL; + const char *from = NULL; + char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); + + if(data == NULL) { + return; + } + if (kind == crm_class_cluster) { + xml = string2xml(data); + if (xml == NULL) { + crm_err("Invalid XML: '%.120s'", data); + free(data); + return; + } + crm_xml_add(xml, F_ORIG, from); + /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */ + stonith_peer_callback(xml, NULL); + } + + free_xml(xml); + free(data); + return; +} + +static void +stonith_peer_cs_destroy(gpointer user_data) +{ + crm_crit("Lost connection to cluster layer, shutting down"); + stonith_shutdown(0); +} +#endif + +void +do_local_reply(xmlNode *notify_src, pcmk__client_t *client, int call_options) +{ + /* send callback to originating child */ + int local_rc = pcmk_rc_ok; + int rid = 0; + uint32_t ipc_flags = crm_ipc_server_event; + + if (pcmk_is_set(call_options, st_opt_sync_call)) { + CRM_LOG_ASSERT(client->request_id); + rid = client->request_id; + client->request_id = 0; + ipc_flags = crm_ipc_flags_none; + } + + local_rc = pcmk__ipc_send_xml(client, rid, notify_src, ipc_flags); + if (local_rc == pcmk_rc_ok) { + crm_trace("Sent response %d to client %s", + rid, pcmk__client_name(client)); + } else { + crm_warn("%synchronous reply to client %s failed: %s", + (pcmk_is_set(call_options, st_opt_sync_call)? "S" : "As"), + pcmk__client_name(client), pcmk_rc_str(local_rc)); + } +} + +uint64_t +get_stonith_flag(const char *name) +{ + if (pcmk__str_eq(name, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) { + return st_callback_notify_fence; + + } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) { + return st_callback_device_add; + + } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) { + return st_callback_device_del; + + } else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY, pcmk__str_casei)) { + return st_callback_notify_history; + + } else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY_SYNCED, pcmk__str_casei)) { + return st_callback_notify_history_synced; + + } + return st_callback_unknown; +} + +static void +stonith_notify_client(gpointer key, gpointer value, gpointer user_data) +{ + + xmlNode *update_msg = user_data; + pcmk__client_t *client = value; + const char *type = NULL; + + CRM_CHECK(client != NULL, return); + CRM_CHECK(update_msg != NULL, return); + + type = crm_element_value(update_msg, F_SUBTYPE); + CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); + + if (client->ipcs == NULL) { + crm_trace("Skipping client with NULL channel"); + return; + } + + if (pcmk_is_set(client->flags, get_stonith_flag(type))) { + int rc = pcmk__ipc_send_xml(client, 0, update_msg, + crm_ipc_server_event); + + if (rc != pcmk_rc_ok) { + crm_warn("%s notification of client %s failed: %s " + CRM_XS " id=%.8s rc=%d", type, pcmk__client_name(client), + pcmk_rc_str(rc), client->id, rc); + } else { + crm_trace("Sent %s notification to client %s", + type, pcmk__client_name(client)); + } + } +} + +void +do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout) +{ + pcmk__client_t *client = NULL; + xmlNode *notify_data = NULL; + + if (!timeout || !call_id || !client_id) { + return; + } + + client = pcmk__find_client_by_id(client_id); + if (!client) { + return; + } + + notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE); + crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE); + crm_xml_add(notify_data, F_STONITH_CALLID, call_id); + crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout); + + crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id); + + if (client) { + pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event); + } + + free_xml(notify_data); +} + +/*! + * \internal + * \brief Notify relevant IPC clients of a fencing operation result + * + * \param[in] type Notification type + * \param[in] result Result of fencing operation (assume success if NULL) + * \param[in] data If not NULL, add to notification as call data + */ +void +fenced_send_notification(const char *type, const pcmk__action_result_t *result, + xmlNode *data) +{ + /* TODO: Standardize the contents of data */ + xmlNode *update_msg = create_xml_node(NULL, "notify"); + + CRM_LOG_ASSERT(type != NULL); + + crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY); + crm_xml_add(update_msg, F_SUBTYPE, type); + crm_xml_add(update_msg, F_STONITH_OPERATION, type); + stonith__xe_set_result(update_msg, result); + + if (data != NULL) { + add_message_xml(update_msg, F_STONITH_CALLDATA, data); + } + + crm_trace("Notifying clients"); + pcmk__foreach_ipc_client(stonith_notify_client, update_msg); + free_xml(update_msg); + crm_trace("Notify complete"); +} + +/*! + * \internal + * \brief Send notifications for a configuration change to subscribed clients + * + * \param[in] op Notification type (STONITH_OP_DEVICE_ADD, + * STONITH_OP_DEVICE_DEL, STONITH_OP_LEVEL_ADD, or + * STONITH_OP_LEVEL_DEL) + * \param[in] result Operation result + * \param[in] desc Description of what changed + * \param[in] active Current number of devices or topologies in use + */ +static void +send_config_notification(const char *op, const pcmk__action_result_t *result, + const char *desc, int active) +{ + xmlNode *notify_data = create_xml_node(NULL, op); + + CRM_CHECK(notify_data != NULL, return); + + crm_xml_add(notify_data, F_STONITH_DEVICE, desc); + crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active); + + fenced_send_notification(op, result, notify_data); + free_xml(notify_data); +} + +/*! + * \internal + * \brief Send notifications for a device change to subscribed clients + * + * \param[in] op Notification type (STONITH_OP_DEVICE_ADD or + * STONITH_OP_DEVICE_DEL) + * \param[in] result Operation result + * \param[in] desc ID of device that changed + */ +void +fenced_send_device_notification(const char *op, + const pcmk__action_result_t *result, + const char *desc) +{ + send_config_notification(op, result, desc, g_hash_table_size(device_list)); +} + +/*! + * \internal + * \brief Send notifications for a topology level change to subscribed clients + * + * \param[in] op Notification type (STONITH_OP_LEVEL_ADD or + * STONITH_OP_LEVEL_DEL) + * \param[in] result Operation result + * \param[in] desc String representation of level ([]) + */ +void +fenced_send_level_notification(const char *op, + const pcmk__action_result_t *result, + const char *desc) +{ + send_config_notification(op, result, desc, g_hash_table_size(topology)); +} + +static void +topology_remove_helper(const char *node, int level) +{ + char *desc = NULL; + pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; + xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); + + crm_xml_add(data, F_STONITH_ORIGIN, __func__); + crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); + crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); + + fenced_unregister_level(data, &desc, &result); + fenced_send_level_notification(STONITH_OP_LEVEL_DEL, &result, desc); + pcmk__reset_result(&result); + free_xml(data); + free(desc); +} + +static void +remove_cib_device(xmlXPathObjectPtr xpathObj) +{ + int max = numXpathResults(xpathObj), lpc = 0; + + for (lpc = 0; lpc < max; lpc++) { + const char *rsc_id = NULL; + const char *standard = NULL; + xmlNode *match = getXpathResult(xpathObj, lpc); + + CRM_LOG_ASSERT(match != NULL); + if(match != NULL) { + standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); + } + + if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { + continue; + } + + rsc_id = crm_element_value(match, XML_ATTR_ID); + + stonith_device_remove(rsc_id, true); + } +} + +static void +remove_topology_level(xmlNode *match) +{ + int index = 0; + char *key = NULL; + + CRM_CHECK(match != NULL, return); + + key = stonith_level_key(match, fenced_target_by_unknown); + crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); + topology_remove_helper(key, index); + free(key); +} + +static void +add_topology_level(xmlNode *match) +{ + char *desc = NULL; + pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; + + CRM_CHECK(match != NULL, return); + + fenced_register_level(match, &desc, &result); + fenced_send_level_notification(STONITH_OP_LEVEL_ADD, &result, desc); + pcmk__reset_result(&result); + free(desc); +} + +static void +remove_fencing_topology(xmlXPathObjectPtr xpathObj) +{ + int max = numXpathResults(xpathObj), lpc = 0; + + for (lpc = 0; lpc < max; lpc++) { + xmlNode *match = getXpathResult(xpathObj, lpc); + + CRM_LOG_ASSERT(match != NULL); + if (match && crm_element_value(match, XML_DIFF_MARKER)) { + /* Deletion */ + int index = 0; + char *target = stonith_level_key(match, fenced_target_by_unknown); + + crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); + if (target == NULL) { + crm_err("Invalid fencing target in element %s", ID(match)); + + } else if (index <= 0) { + crm_err("Invalid level for %s in element %s", target, ID(match)); + + } else { + topology_remove_helper(target, index); + } + /* } else { Deal with modifications during the 'addition' stage */ + } + } +} + +static void +register_fencing_topology(xmlXPathObjectPtr xpathObj) +{ + int max = numXpathResults(xpathObj), lpc = 0; + + for (lpc = 0; lpc < max; lpc++) { + xmlNode *match = getXpathResult(xpathObj, lpc); + + remove_topology_level(match); + add_topology_level(match); + } +} + +/* Fencing + + + + + + + + + + + + + + + + +*/ + +static void +fencing_topology_init(void) +{ + xmlXPathObjectPtr xpathObj = NULL; + const char *xpath = "//" XML_TAG_FENCING_LEVEL; + + crm_trace("Full topology refresh"); + free_topology_list(); + init_topology_list(); + + /* Grab everything */ + xpathObj = xpath_search(local_cib, xpath); + register_fencing_topology(xpathObj); + + freeXpathObject(xpathObj); +} + +#define rsc_name(x) x->clone_name?x->clone_name:x->id + +/*! + * \internal + * \brief Check whether our uname is in a resource's allowed node list + * + * \param[in] rsc Resource to check + * + * \return Pointer to node object if found, NULL otherwise + */ +static pe_node_t * +our_node_allowed_for(const pe_resource_t *rsc) +{ + GHashTableIter iter; + pe_node_t *node = NULL; + + if (rsc && stonith_our_uname) { + g_hash_table_iter_init(&iter, rsc->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { + if (node && strcmp(node->details->uname, stonith_our_uname) == 0) { + break; + } + node = NULL; + } + } + return node; +} + +static void +watchdog_device_update(void) +{ + if (stonith_watchdog_timeout_ms > 0) { + if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) && + !stonith_watchdog_targets) { + /* getting here watchdog-fencing enabled, no device there yet + and reason isn't stonith_watchdog_targets preventing that + */ + int rc; + xmlNode *xml; + + xml = create_device_registration_xml( + STONITH_WATCHDOG_ID, + st_namespace_internal, + STONITH_WATCHDOG_AGENT, + NULL, /* stonith_device_register will add our + own name as PCMK_STONITH_HOST_LIST param + so we can skip that here + */ + NULL); + rc = stonith_device_register(xml, TRUE); + free_xml(xml); + if (rc != pcmk_ok) { + rc = pcmk_legacy2rc(rc); + exit_code = CRM_EX_FATAL; + crm_crit("Cannot register watchdog pseudo fence agent: %s", + pcmk_rc_str(rc)); + stonith_shutdown(0); + } + } + + } else if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) != NULL) { + /* be silent if no device - todo parameter to stonith_device_remove */ + stonith_device_remove(STONITH_WATCHDOG_ID, true); + } +} + +static void +update_stonith_watchdog_timeout_ms(xmlNode *cib) +{ + long timeout_ms = 0; + xmlNode *stonith_watchdog_xml = NULL; + const char *value = NULL; + + stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", + cib, LOG_NEVER); + if (stonith_watchdog_xml) { + value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); + } + if (value) { + timeout_ms = crm_get_msec(value); + } + + if (timeout_ms < 0) { + timeout_ms = pcmk__auto_watchdog_timeout(); + } + + stonith_watchdog_timeout_ms = timeout_ms; +} + +/*! + * \internal + * \brief If a resource or any of its children are STONITH devices, update their + * definitions given a cluster working set. + * + * \param[in,out] rsc Resource to check + * \param[in,out] data_set Cluster working set with device information + */ +static void +cib_device_update(pe_resource_t *rsc, pe_working_set_t *data_set) +{ + pe_node_t *node = NULL; + const char *value = NULL; + const char *rclass = NULL; + pe_node_t *parent = NULL; + + /* If this is a complex resource, check children rather than this resource itself. */ + if(rsc->children) { + GList *gIter = NULL; + for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { + cib_device_update(gIter->data, data_set); + if(pe_rsc_is_clone(rsc)) { + crm_trace("Only processing one copy of the clone %s", rsc->id); + break; + } + } + return; + } + + /* We only care about STONITH resources. */ + rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); + if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { + return; + } + + /* If this STONITH resource is disabled, remove it. */ + if (pe__resource_is_disabled(rsc)) { + crm_info("Device %s has been disabled", rsc->id); + return; + } + + /* if watchdog-fencing is disabled handle any watchdog-fence + resource as if it was disabled + */ + if ((stonith_watchdog_timeout_ms <= 0) && + pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { + crm_info("Watchdog-fencing disabled thus handling " + "device %s as disabled", rsc->id); + return; + } + + /* Check whether our node is allowed for this resource (and its parent if in a group) */ + node = our_node_allowed_for(rsc); + if (rsc->parent && (rsc->parent->variant == pe_group)) { + parent = our_node_allowed_for(rsc->parent); + } + + if(node == NULL) { + /* Our node is disallowed, so remove the device */ + GHashTableIter iter; + + crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname); + g_hash_table_iter_init(&iter, rsc->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { + crm_trace("Available: %s = %d", pe__node_name(node), node->weight); + } + + return; + + } else if(node->weight < 0 || (parent && parent->weight < 0)) { + /* Our node (or its group) is disallowed by score, so remove the device */ + int score = (node->weight < 0)? node->weight : parent->weight; + + crm_info("Device %s has been disabled on %s: score=%s", + rsc->id, stonith_our_uname, pcmk_readable_score(score)); + return; + + } else { + /* Our node is allowed, so update the device information */ + int rc; + xmlNode *data; + GHashTable *rsc_params = NULL; + GHashTableIter gIter; + stonith_key_value_t *params = NULL; + + const char *name = NULL; + const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE); + const char *rsc_provides = NULL; + + crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight); + rsc_params = pe_rsc_params(rsc, node, data_set); + get_meta_attributes(rsc->meta, rsc, node, data_set); + + rsc_provides = g_hash_table_lookup(rsc->meta, PCMK_STONITH_PROVIDES); + + g_hash_table_iter_init(&gIter, rsc_params); + while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) { + if (!name || !value) { + continue; + } + params = stonith_key_value_add(params, name, value); + crm_trace(" %s=%s", name, value); + } + + data = create_device_registration_xml(rsc_name(rsc), st_namespace_any, + agent, params, rsc_provides); + stonith_key_value_freeall(params, 1, 1); + rc = stonith_device_register(data, TRUE); + CRM_ASSERT(rc == pcmk_ok); + free_xml(data); + } +} + +/*! + * \internal + * \brief Update all STONITH device definitions based on current CIB + */ +static void +cib_devices_update(void) +{ + GHashTableIter iter; + stonith_device_t *device = NULL; + + crm_info("Updating devices to version %s.%s.%s", + crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN), + crm_element_value(local_cib, XML_ATTR_GENERATION), + crm_element_value(local_cib, XML_ATTR_NUMUPDATES)); + + if (fenced_data_set->now != NULL) { + crm_time_free(fenced_data_set->now); + fenced_data_set->now = NULL; + } + fenced_data_set->localhost = stonith_our_uname; + pcmk__schedule_actions(local_cib, data_set_flags, fenced_data_set); + + g_hash_table_iter_init(&iter, device_list); + while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { + if (device->cib_registered) { + device->dirty = TRUE; + } + } + + /* have list repopulated if cib has a watchdog-fencing-resource + TODO: keep a cached list for queries happening while we are refreshing + */ + g_list_free_full(stonith_watchdog_targets, free); + stonith_watchdog_targets = NULL; + g_list_foreach(fenced_data_set->resources, (GFunc) cib_device_update, fenced_data_set); + + g_hash_table_iter_init(&iter, device_list); + while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { + if (device->dirty) { + g_hash_table_iter_remove(&iter); + } + } + + fenced_data_set->input = NULL; // Wasn't a copy, so don't let API free it + pe_reset_working_set(fenced_data_set); +} + +static void +update_cib_stonith_devices_v2(const char *event, xmlNode * msg) +{ + xmlNode *change = NULL; + char *reason = NULL; + bool needs_update = FALSE; + xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); + + for (change = pcmk__xml_first_child(patchset); change != NULL; + change = pcmk__xml_next(change)) { + const char *op = crm_element_value(change, XML_DIFF_OP); + const char *xpath = crm_element_value(change, XML_DIFF_PATH); + const char *shortpath = NULL; + + if ((op == NULL) || + (strcmp(op, "move") == 0) || + strstr(xpath, "/"XML_CIB_TAG_STATUS)) { + continue; + } else if (pcmk__str_eq(op, "delete", pcmk__str_casei) && strstr(xpath, "/"XML_CIB_TAG_RESOURCE)) { + const char *rsc_id = NULL; + char *search = NULL; + char *mutable = NULL; + + if (strstr(xpath, XML_TAG_ATTR_SETS) || + strstr(xpath, XML_TAG_META_SETS)) { + needs_update = TRUE; + pcmk__str_update(&reason, + "(meta) attribute deleted from resource"); + break; + } + pcmk__str_update(&mutable, xpath); + rsc_id = strstr(mutable, "primitive[@" XML_ATTR_ID "=\'"); + if (rsc_id != NULL) { + rsc_id += strlen("primitive[@" XML_ATTR_ID "=\'"); + search = strchr(rsc_id, '\''); + } + if (search != NULL) { + *search = 0; + stonith_device_remove(rsc_id, true); + /* watchdog_device_update called afterwards + to fall back to implicit definition if needed */ + } else { + crm_warn("Ignoring malformed CIB update (resource deletion)"); + } + free(mutable); + + } else if (strstr(xpath, "/"XML_CIB_TAG_RESOURCES) || + strstr(xpath, "/"XML_CIB_TAG_CONSTRAINTS) || + strstr(xpath, "/"XML_CIB_TAG_RSCCONFIG)) { + shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); + reason = crm_strdup_printf("%s %s", op, shortpath+1); + needs_update = TRUE; + break; + } + } + + if(needs_update) { + crm_info("Updating device list from CIB: %s", reason); + cib_devices_update(); + } else { + crm_trace("No updates for device list found in CIB"); + } + free(reason); +} + + +static void +update_cib_stonith_devices_v1(const char *event, xmlNode * msg) +{ + const char *reason = "none"; + gboolean needs_update = FALSE; + xmlXPathObjectPtr xpath_obj = NULL; + + /* process new constraints */ + xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION); + if (numXpathResults(xpath_obj) > 0) { + int max = numXpathResults(xpath_obj), lpc = 0; + + /* Safest and simplest to always recompute */ + needs_update = TRUE; + reason = "new location constraint"; + + for (lpc = 0; lpc < max; lpc++) { + xmlNode *match = getXpathResult(xpath_obj, lpc); + + crm_log_xml_trace(match, "new constraint"); + } + } + freeXpathObject(xpath_obj); + + /* process deletions */ + xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE); + if (numXpathResults(xpath_obj) > 0) { + remove_cib_device(xpath_obj); + } + freeXpathObject(xpath_obj); + + /* process additions */ + xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE); + if (numXpathResults(xpath_obj) > 0) { + int max = numXpathResults(xpath_obj), lpc = 0; + + for (lpc = 0; lpc < max; lpc++) { + const char *rsc_id = NULL; + const char *standard = NULL; + xmlNode *match = getXpathResult(xpath_obj, lpc); + + rsc_id = crm_element_value(match, XML_ATTR_ID); + standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); + + if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { + continue; + } + + crm_trace("Fencing resource %s was added or modified", rsc_id); + reason = "new resource"; + needs_update = TRUE; + } + } + freeXpathObject(xpath_obj); + + if(needs_update) { + crm_info("Updating device list from CIB: %s", reason); + cib_devices_update(); + } +} + +static void +update_cib_stonith_devices(const char *event, xmlNode * msg) +{ + int format = 1; + xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); + + CRM_ASSERT(patchset); + crm_element_value_int(patchset, "format", &format); + switch(format) { + case 1: + update_cib_stonith_devices_v1(event, msg); + break; + case 2: + update_cib_stonith_devices_v2(event, msg); + break; + default: + crm_warn("Unknown patch format: %d", format); + } +} + +/*! + * \internal + * \brief Check whether a node has a specific attribute name/value + * + * \param[in] node Name of node to check + * \param[in] name Name of an attribute to look for + * \param[in] value The value the named attribute needs to be set to in order to be considered a match + * + * \return TRUE if the locally cached CIB has the specified node attribute + */ +gboolean +node_has_attr(const char *node, const char *name, const char *value) +{ + GString *xpath = NULL; + xmlNode *match; + + CRM_CHECK((local_cib != NULL) && (node != NULL) && (name != NULL) + && (value != NULL), return FALSE); + + /* Search for the node's attributes in the CIB. While the schema allows + * multiple sets of instance attributes, and allows instance attributes to + * use id-ref to reference values elsewhere, that is intended for resources, + * so we ignore that here. + */ + xpath = g_string_sized_new(256); + pcmk__g_strcat(xpath, + "//" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE + "[@" XML_ATTR_UNAME "='", node, "']/" XML_TAG_ATTR_SETS + "/" XML_CIB_TAG_NVPAIR + "[@" XML_NVPAIR_ATTR_NAME "='", name, "' " + "and @" XML_NVPAIR_ATTR_VALUE "='", value, "']", NULL); + + match = get_xpath_object((const char *) xpath->str, local_cib, LOG_NEVER); + + g_string_free(xpath, TRUE); + return (match != NULL); +} + +/*! + * \internal + * \brief Check whether a node does watchdog-fencing + * + * \param[in] node Name of node to check + * + * \return TRUE if node found in stonith_watchdog_targets + * or stonith_watchdog_targets is empty indicating + * all nodes are doing watchdog-fencing + */ +gboolean +node_does_watchdog_fencing(const char *node) +{ + return ((stonith_watchdog_targets == NULL) || + pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei)); +} + + +static void +update_fencing_topology(const char *event, xmlNode * msg) +{ + int format = 1; + const char *xpath; + xmlXPathObjectPtr xpathObj = NULL; + xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); + + CRM_ASSERT(patchset); + crm_element_value_int(patchset, "format", &format); + + if(format == 1) { + /* Process deletions (only) */ + xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL; + xpathObj = xpath_search(msg, xpath); + + remove_fencing_topology(xpathObj); + freeXpathObject(xpathObj); + + /* Process additions and changes */ + xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; + xpathObj = xpath_search(msg, xpath); + + register_fencing_topology(xpathObj); + freeXpathObject(xpathObj); + + } else if(format == 2) { + xmlNode *change = NULL; + int add[] = { 0, 0, 0 }; + int del[] = { 0, 0, 0 }; + + xml_patch_versions(patchset, add, del); + + for (change = pcmk__xml_first_child(patchset); change != NULL; + change = pcmk__xml_next(change)) { + const char *op = crm_element_value(change, XML_DIFF_OP); + const char *xpath = crm_element_value(change, XML_DIFF_PATH); + + if(op == NULL) { + continue; + + } else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) { + /* Change to a specific entry */ + + crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); + if(strcmp(op, "move") == 0) { + continue; + + } else if(strcmp(op, "create") == 0) { + add_topology_level(change->children); + + } else if(strcmp(op, "modify") == 0) { + xmlNode *match = first_named_child(change, XML_DIFF_RESULT); + + if(match) { + remove_topology_level(match->children); + add_topology_level(match->children); + } + + } else if(strcmp(op, "delete") == 0) { + /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */ + crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s", + op, add[0], add[1], add[2], xpath); + fencing_topology_init(); + return; + } + + } else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) { + /* Change to the topology in general */ + crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s", + op, add[0], add[1], add[2], xpath); + fencing_topology_init(); + return; + + } else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) { + /* Changes to the whole config section, possibly including the topology as a whild */ + if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) { + crm_trace("Nothing for us in %s operation %d.%d.%d for %s.", + op, add[0], add[1], add[2], xpath); + + } else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) { + crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.", + op, add[0], add[1], add[2], xpath); + fencing_topology_init(); + return; + } + + } else { + crm_trace("Nothing for us in %s operation %d.%d.%d for %s", + op, add[0], add[1], add[2], xpath); + } + } + + } else { + crm_warn("Unknown patch format: %d", format); + } +} +static bool have_cib_devices = FALSE; + +static void +update_cib_cache_cb(const char *event, xmlNode * msg) +{ + int rc = pcmk_ok; + long timeout_ms_saved = stonith_watchdog_timeout_ms; + bool need_full_refresh = false; + + if(!have_cib_devices) { + crm_trace("Skipping updates until we get a full dump"); + return; + + } else if(msg == NULL) { + crm_trace("Missing %s update", event); + return; + } + + /* Maintain a local copy of the CIB so that we have full access + * to device definitions, location constraints, and node attributes + */ + if (local_cib != NULL) { + int rc = pcmk_ok; + xmlNode *patchset = NULL; + + crm_element_value_int(msg, F_CIB_RC, &rc); + if (rc != pcmk_ok) { + return; + } + + patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); + pcmk__output_set_log_level(logger_out, LOG_TRACE); + out->message(out, "xml-patchset", patchset); + rc = xml_apply_patchset(local_cib, patchset, TRUE); + switch (rc) { + case pcmk_ok: + case -pcmk_err_old_data: + break; + case -pcmk_err_diff_resync: + case -pcmk_err_diff_failed: + crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); + free_xml(local_cib); + local_cib = NULL; + break; + default: + crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); + free_xml(local_cib); + local_cib = NULL; + } + } + + if (local_cib == NULL) { + crm_trace("Re-requesting full CIB"); + rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call); + if(rc != pcmk_ok) { + crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc); + return; + } + CRM_ASSERT(local_cib != NULL); + need_full_refresh = true; + } + + pcmk__refresh_node_caches_from_cib(local_cib); + update_stonith_watchdog_timeout_ms(local_cib); + + if (timeout_ms_saved != stonith_watchdog_timeout_ms) { + need_full_refresh = true; + } + + if (need_full_refresh) { + fencing_topology_init(); + cib_devices_update(); + } else { + // Partial refresh + update_fencing_topology(event, msg); + update_cib_stonith_devices(event, msg); + } + + watchdog_device_update(); +} + +static void +init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + crm_info("Updating device list from CIB"); + have_cib_devices = TRUE; + local_cib = copy_xml(output); + + pcmk__refresh_node_caches_from_cib(local_cib); + update_stonith_watchdog_timeout_ms(local_cib); + + fencing_topology_init(); + cib_devices_update(); + watchdog_device_update(); +} + +static void +stonith_shutdown(int nsig) +{ + crm_info("Terminating with %d clients", pcmk__ipc_client_count()); + stonith_shutdown_flag = TRUE; + if (mainloop != NULL && g_main_loop_is_running(mainloop)) { + g_main_loop_quit(mainloop); + } +} + +static void +cib_connection_destroy(gpointer user_data) +{ + if (stonith_shutdown_flag) { + crm_info("Connection to the CIB manager closed"); + return; + } else { + crm_crit("Lost connection to the CIB manager, shutting down"); + } + if (cib_api) { + cib_api->cmds->signoff(cib_api); + } + stonith_shutdown(0); +} + +static void +stonith_cleanup(void) +{ + if (cib_api) { + cib_api->cmds->del_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb); + cib_api->cmds->signoff(cib_api); + } + + if (ipcs) { + qb_ipcs_destroy(ipcs); + } + + crm_peer_destroy(); + pcmk__client_cleanup(); + free_stonith_remote_op_list(); + free_topology_list(); + free_device_list(); + free_metadata_cache(); + fenced_unregister_handlers(); + + free(stonith_our_uname); + stonith_our_uname = NULL; + + free_xml(local_cib); + local_cib = NULL; +} + +static gboolean +stand_alone_cpg_cb(const gchar *option_name, const gchar *optarg, gpointer data, + GError **error) +{ + stand_alone = FALSE; + options.no_cib_connect = true; + return TRUE; +} + +static void +setup_cib(void) +{ + int rc, retries = 0; + + cib_api = cib_new(); + if (cib_api == NULL) { + crm_err("No connection to the CIB manager"); + return; + } + + do { + sleep(retries); + rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command); + } while (rc == -ENOTCONN && ++retries < 5); + + if (rc != pcmk_ok) { + crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc); + + } else if (pcmk_ok != + cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { + crm_err("Could not set CIB notification callback"); + + } else { + rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local); + cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", + init_cib_cache_cb); + cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); + crm_info("Watching for fencing topology changes"); + } +} + +struct qb_ipcs_service_handlers ipc_callbacks = { + .connection_accept = st_ipc_accept, + .connection_created = NULL, + .msg_process = st_ipc_dispatch, + .connection_closed = st_ipc_closed, + .connection_destroyed = st_ipc_destroy +}; + +/*! + * \internal + * \brief Callback for peer status changes + * + * \param[in] type What changed + * \param[in] node What peer had the change + * \param[in] data Previous value of what changed + */ +static void +st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) +{ + if ((type != crm_status_processes) + && !pcmk_is_set(node->flags, crm_remote_node)) { + /* + * This is a hack until we can send to a nodeid and/or we fix node name lookups + * These messages are ignored in stonith_peer_callback() + */ + xmlNode *query = create_xml_node(NULL, "stonith_command"); + + crm_xml_add(query, F_XML_TAGNAME, "stonith_command"); + crm_xml_add(query, F_TYPE, T_STONITH_NG); + crm_xml_add(query, F_STONITH_OPERATION, "poke"); + + crm_debug("Broadcasting our uname because of node %u", node->id); + send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); + + free_xml(query); + } +} + +static pcmk__cluster_option_t fencer_options[] = { + /* name, old name, type, allowed values, + * default value, validator, + * short description, + * long description + */ + { + PCMK_STONITH_HOST_ARGUMENT, NULL, "string", NULL, "port", NULL, + N_("Advanced use only: An alternate parameter to supply instead of 'port'"), + N_("some devices do not support the " + "standard 'port' parameter or may provide additional ones. Use " + "this to specify an alternate, device-specific, parameter " + "that should indicate the machine to be fenced. A value of " + "none can be used to tell the cluster not to supply any " + "additional parameters.") + }, + { + PCMK_STONITH_HOST_MAP,NULL, "string", NULL, "", NULL, + N_("A mapping of host names to ports numbers for devices that do not support host names."), + N_("Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2") + }, + { + PCMK_STONITH_HOST_LIST,NULL, "string", NULL, "", NULL, + N_("Eg. node1,node2,node3"), + N_("A list of machines controlled by " + "this device (Optional unless pcmk_host_list=static-list)") + }, + { + PCMK_STONITH_HOST_CHECK,NULL, "string", NULL, "dynamic-list", NULL, + N_("How to determine which machines are controlled by the device."), + N_("Allowed values: dynamic-list " + "(query the device via the 'list' command), static-list " + "(check the pcmk_host_list attribute), status " + "(query the device via the 'status' command), " + "none (assume every device can fence every " + "machine)") + }, + { + PCMK_STONITH_DELAY_MAX,NULL, "time", NULL, "0s", NULL, + N_("Enable a base delay for fencing actions and specify base delay value."), + N_("Enable a delay of no more than the " + "time specified before executing fencing actions. Pacemaker " + "derives the overall delay by taking the value of " + "pcmk_delay_base and adding a random delay value such " + "that the sum is kept below this maximum.") + }, + { + PCMK_STONITH_DELAY_BASE,NULL, "string", NULL, "0s", NULL, + N_("Enable a base delay for " + "fencing actions and specify base delay value."), + N_("This enables a static delay for " + "fencing actions, which can help avoid \"death matches\" where " + "two nodes try to fence each other at the same time. If " + "pcmk_delay_max is also used, a random delay will be " + "added such that the total delay is kept below that value." + "This can be set to a single time value to apply to any node " + "targeted by this device (useful if a separate device is " + "configured for each target), or to a node map (for example, " + "\"node1:1s;node2:5\") to set a different value per target.") + }, + { + PCMK_STONITH_ACTION_LIMIT,NULL, "integer", NULL, "1", NULL, + N_("The maximum number of actions can be performed in parallel on this device"), + N_("Cluster property concurrent-fencing=true needs to be configured first." + "Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.") + }, + { + "pcmk_reboot_action",NULL, "string", NULL, "reboot", NULL, + N_("Advanced use only: An alternate command to run instead of 'reboot'"), + N_("Some devices do not support the standard commands or may provide additional ones.\n" + "Use this to specify an alternate, device-specific, command that implements the \'reboot\' action.") + }, + { + "pcmk_reboot_timeout",NULL, "time", NULL, "60s", NULL, + N_("Advanced use only: Specify an alternate timeout to use for reboot actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal." + "Use this to specify an alternate, device-specific, timeout for \'reboot\' actions.") + }, + { + "pcmk_reboot_retries",NULL, "integer", NULL, "2", NULL, + N_("Advanced use only: The maximum number of times to retry the 'reboot' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." + " Use this option to alter the number of times Pacemaker retries \'reboot\' actions before giving up.") + }, + { + "pcmk_off_action",NULL, "string", NULL, "off", NULL, + N_("Advanced use only: An alternate command to run instead of \'off\'"), + N_("Some devices do not support the standard commands or may provide additional ones." + "Use this to specify an alternate, device-specific, command that implements the \'off\' action.") + }, + { + "pcmk_off_timeout",NULL, "time", NULL, "60s", NULL, + N_("Advanced use only: Specify an alternate timeout to use for off actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal." + "Use this to specify an alternate, device-specific, timeout for \'off\' actions.") + }, + { + "pcmk_off_retries",NULL, "integer", NULL, "2", NULL, + N_("Advanced use only: The maximum number of times to retry the 'off' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." + " Use this option to alter the number of times Pacemaker retries \'off\' actions before giving up.") + }, + { + "pcmk_on_action",NULL, "string", NULL, "on", NULL, + N_("Advanced use only: An alternate command to run instead of 'on'"), + N_("Some devices do not support the standard commands or may provide additional ones." + "Use this to specify an alternate, device-specific, command that implements the \'on\' action.") + }, + { + "pcmk_on_timeout",NULL, "time", NULL, "60s", NULL, + N_("Advanced use only: Specify an alternate timeout to use for on actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal." + "Use this to specify an alternate, device-specific, timeout for \'on\' actions.") + }, + { + "pcmk_on_retries",NULL, "integer", NULL, "2", NULL, + N_("Advanced use only: The maximum number of times to retry the 'on' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." + " Use this option to alter the number of times Pacemaker retries \'on\' actions before giving up.") + }, + { + "pcmk_list_action",NULL, "string", NULL, "list", NULL, + N_("Advanced use only: An alternate command to run instead of \'list\'"), + N_("Some devices do not support the standard commands or may provide additional ones." + "Use this to specify an alternate, device-specific, command that implements the \'list\' action.") + }, + { + "pcmk_list_timeout",NULL, "time", NULL, "60s", NULL, + N_("Advanced use only: Specify an alternate timeout to use for list actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal." + "Use this to specify an alternate, device-specific, timeout for \'list\' actions.") + }, + { + "pcmk_list_retries",NULL, "integer", NULL, "2", NULL, + N_("Advanced use only: The maximum number of times to retry the \'list\' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." + " Use this option to alter the number of times Pacemaker retries \'list\' actions before giving up.") + }, + { + "pcmk_monitor_action",NULL, "string", NULL, "monitor", NULL, + N_("Advanced use only: An alternate command to run instead of \'monitor\'"), + N_("Some devices do not support the standard commands or may provide additional ones." + "Use this to specify an alternate, device-specific, command that implements the \'monitor\' action.") + }, + { + "pcmk_monitor_timeout",NULL, "time", NULL, "60s", NULL, + N_("Advanced use only: Specify an alternate timeout to use for monitor actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal.\n" + "Use this to specify an alternate, device-specific, timeout for \'monitor\' actions.") + }, + { + "pcmk_monitor_retries",NULL, "integer", NULL, "2", NULL, + N_("Advanced use only: The maximum number of times to retry the \'monitor\' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." + " Use this option to alter the number of times Pacemaker retries \'monitor\' actions before giving up.") + }, + { + "pcmk_status_action",NULL, "string", NULL, "status", NULL, + N_("Advanced use only: An alternate command to run instead of \'status\'"), + N_("Some devices do not support the standard commands or may provide additional ones." + "Use this to specify an alternate, device-specific, command that implements the \'status\' action.") + }, + { + "pcmk_status_timeout",NULL, "time", NULL, "60s", NULL, + N_("Advanced use only: Specify an alternate timeout to use for status actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal." + "Use this to specify an alternate, device-specific, timeout for \'status\' actions.") + }, + { + "pcmk_status_retries",NULL, "integer", NULL, "2", NULL, + N_("Advanced use only: The maximum number of times to retry the \'status\' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." + " Use this option to alter the number of times Pacemaker retries \'status\' actions before giving up.") + }, +}; + +void +fencer_metadata(void) +{ + const char *desc_short = N_("Instance attributes available for all " + "\"stonith\"-class resources"); + const char *desc_long = N_("Instance attributes available for all \"stonith\"-" + "class resources and used by Pacemaker's fence " + "daemon, formerly known as stonithd"); + + gchar *s = pcmk__format_option_metadata("pacemaker-fenced", desc_short, + desc_long, fencer_options, + PCMK__NELEM(fencer_options)); + printf("%s", s); + g_free(s); +} + +static GOptionEntry entries[] = { + { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone, + "Deprecated (will be removed in a future release)", NULL }, + + { "stand-alone-w-cpg", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, + stand_alone_cpg_cb, "Intended for use in regression testing only", NULL }, + + { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, + &options.log_files, "Send logs to the additional named logfile", NULL }, + + { NULL } +}; + +static GOptionContext * +build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) +{ + GOptionContext *context = NULL; + + context = pcmk__build_arg_context(args, "text (default), xml", group, + "[metadata]"); + pcmk__add_main_args(context, entries); + return context; +} + +int +main(int argc, char **argv) +{ + int rc = pcmk_rc_ok; + crm_cluster_t *cluster = NULL; + crm_ipc_t *old_instance = NULL; + + GError *error = NULL; + + GOptionGroup *output_group = NULL; + pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); + gchar **processed_args = pcmk__cmdline_preproc(argv, "l"); + GOptionContext *context = build_arg_context(args, &output_group); + + crm_log_preinit(NULL, argc, argv); + + pcmk__register_formats(output_group, formats); + if (!g_option_context_parse_strv(context, &processed_args, &error)) { + exit_code = CRM_EX_USAGE; + goto done; + } + + rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); + if (rc != pcmk_rc_ok) { + exit_code = CRM_EX_ERROR; + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "Error creating output format %s: %s", + args->output_ty, pcmk_rc_str(rc)); + goto done; + } + + if (args->version) { + out->version(out, false); + goto done; + } + + if ((g_strv_length(processed_args) >= 2) + && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { + fencer_metadata(); + goto done; + } + + // Open additional log files + pcmk__add_logfiles(options.log_files, out); + + crm_log_init(NULL, LOG_INFO + args->verbosity, TRUE, + (args->verbosity > 0), argc, argv, FALSE); + + crm_notice("Starting Pacemaker fencer"); + + old_instance = crm_ipc_new("stonith-ng", 0); + if (old_instance == NULL) { + /* crm_ipc_new() will have already logged an error message with + * crm_err() + */ + exit_code = CRM_EX_FATAL; + goto done; + } + + if (crm_ipc_connect(old_instance)) { + // IPC endpoint already up + crm_ipc_close(old_instance); + crm_ipc_destroy(old_instance); + crm_err("pacemaker-fenced is already active, aborting startup"); + goto done; + } else { + // Not up or not authentic, we'll proceed either way + crm_ipc_destroy(old_instance); + old_instance = NULL; + } + + mainloop_add_signal(SIGTERM, stonith_shutdown); + + crm_peer_init(); + + fenced_data_set = pe_new_working_set(); + CRM_ASSERT(fenced_data_set != NULL); + + cluster = pcmk_cluster_new(); + + /* Initialize the logger prior to setup_cib(). update_cib_cache_cb() may + * call the "xml-patchset" message function, which needs the logger, after + * setup_cib() has run. + */ + rc = pcmk__log_output_new(&logger_out) != pcmk_rc_ok; + if (rc != pcmk_rc_ok) { + exit_code = CRM_EX_FATAL; + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "Error creating output format log: %s", pcmk_rc_str(rc)); + goto done; + } + pe__register_messages(logger_out); + pcmk__register_lib_messages(logger_out); + pcmk__output_set_log_level(logger_out, LOG_TRACE); + fenced_data_set->priv = logger_out; + + if (!stand_alone) { +#if SUPPORT_COROSYNC + if (is_corosync_cluster()) { + cluster->destroy = stonith_peer_cs_destroy; + cluster->cpg.cpg_deliver_fn = stonith_peer_ais_callback; + cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; + } +#endif // SUPPORT_COROSYNC + + crm_set_status_callback(&st_peer_update_callback); + + if (crm_cluster_connect(cluster) == FALSE) { + exit_code = CRM_EX_FATAL; + crm_crit("Cannot sign in to the cluster... terminating"); + goto done; + } + pcmk__str_update(&stonith_our_uname, cluster->uname); + + if (!options.no_cib_connect) { + setup_cib(); + } + + } else { + pcmk__str_update(&stonith_our_uname, "localhost"); + crm_warn("Stand-alone mode is deprecated and will be removed " + "in a future release"); + } + + init_device_list(); + init_topology_list(); + + pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks); + + // Create the mainloop and run it... + mainloop = g_main_loop_new(NULL, FALSE); + crm_notice("Pacemaker fencer successfully started and accepting connections"); + g_main_loop_run(mainloop); + +done: + g_strfreev(processed_args); + pcmk__free_arg_context(context); + + g_strfreev(options.log_files); + + stonith_cleanup(); + pcmk_cluster_free(cluster); + pe_free_working_set(fenced_data_set); + + pcmk__output_and_clear_error(&error, out); + + if (logger_out != NULL) { + logger_out->finish(logger_out, exit_code, true, NULL); + pcmk__output_free(logger_out); + } + + if (out != NULL) { + out->finish(out, exit_code, true, NULL); + pcmk__output_free(out); + } + + pcmk__unregister_formats(); + crm_exit(exit_code); +} diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h new file mode 100644 index 0000000..a3d2e17 --- /dev/null +++ b/daemons/fenced/pacemaker-fenced.h @@ -0,0 +1,315 @@ +/* + * Copyright 2009-2023 the Pacemaker project contributors + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include // uint32_t, uint64_t +#include + +/*! + * \internal + * \brief Check whether target has already been fenced recently + * + * \param[in] tolerance Number of seconds to look back in time + * \param[in] target Name of node to search for + * \param[in] action Action we want to match + * + * \return TRUE if an equivalent fencing operation took place in the last + * \p tolerance seconds, FALSE otherwise + */ +gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action); + +typedef struct stonith_device_s { + char *id; + char *agent; + char *namespace; + + /*! list of actions that must execute on the target node. Used for unfencing */ + GString *on_target_actions; + GList *targets; + time_t targets_age; + gboolean has_attr_map; + + // Whether target's nodeid should be passed as a parameter to the agent + gboolean include_nodeid; + + /* whether the cluster should automatically unfence nodes with the device */ + gboolean automatic_unfencing; + guint priority; + + uint32_t flags; // Group of enum st_device_flags + + GHashTable *params; + GHashTable *aliases; + GList *pending_ops; + mainloop_timer_t *timer; + crm_trigger_t *work; + xmlNode *agent_metadata; + + /*! A verified device is one that has contacted the + * agent successfully to perform a monitor operation */ + gboolean verified; + + gboolean cib_registered; + gboolean api_registered; + gboolean dirty; +} stonith_device_t; + +/* These values are used to index certain arrays by "phase". Usually an + * operation has only one "phase", so phase is always zero. However, some + * reboots are remapped to "off" then "on", in which case "reboot" will be + * phase 0, "off" will be phase 1 and "on" will be phase 2. + */ +enum st_remap_phase { + st_phase_requested = 0, + st_phase_off = 1, + st_phase_on = 2, + st_phase_max = 3 +}; + +typedef struct remote_fencing_op_s { + /* The unique id associated with this operation */ + char *id; + /*! The node this operation will fence */ + char *target; + /*! The fencing action to perform on the target. (reboot, on, off) */ + char *action; + + /*! When was the fencing action recorded (seconds since epoch) */ + time_t created; + + /*! Marks if the final notifications have been sent to local stonith clients. */ + gboolean notify_sent; + /*! The number of query replies received */ + guint replies; + /*! The number of query replies expected */ + guint replies_expected; + /*! Does this node own control of this operation */ + gboolean owner; + /*! After query is complete, This the high level timer that expires the entire operation */ + guint op_timer_total; + /*! This timer expires the current fencing request. Many fencing + * requests may exist in a single operation */ + guint op_timer_one; + /*! This timer expires the query request sent out to determine + * what nodes are contain what devices, and who those devices can fence */ + guint query_timer; + /*! This is the default timeout to use for each fencing device if no + * custom timeout is received in the query. */ + gint base_timeout; + /*! This is the calculated total timeout an operation can take before + * expiring. This is calculated by adding together all the timeout + * values associated with the devices this fencing operation may call */ + gint total_timeout; + + /*! Requested fencing delay. + * Value -1 means disable any static/random fencing delays. */ + int delay; + + /*! Delegate is the node being asked to perform a fencing action + * on behalf of the node that owns the remote operation. Some operations + * will involve multiple delegates. This value represents the final delegate + * that is used. */ + char *delegate; + /*! The point at which the remote operation completed */ + time_t completed; + //! Group of enum stonith_call_options associated with this operation + uint32_t call_options; + + /*! The current state of the remote operation. This indicates + * what stage the op is in, query, exec, done, duplicate, failed. */ + enum op_state state; + /*! The node that owns the remote operation */ + char *originator; + /*! The local client id that initiated the fencing request */ + char *client_id; + /*! The client's call_id that initiated the fencing request */ + int client_callid; + /*! The name of client that initiated the fencing request */ + char *client_name; + /*! List of the received query results for all the nodes in the cpg group */ + GList *query_results; + /*! The original request that initiated the remote stonith operation */ + xmlNode *request; + + /*! The current topology level being executed */ + guint level; + /*! The current operation phase being executed */ + enum st_remap_phase phase; + + /*! Devices with automatic unfencing (always run if "on" requested, never if remapped) */ + GList *automatic_list; + /*! List of all devices at the currently executing topology level */ + GList *devices_list; + /*! Current entry in the topology device list */ + GList *devices; + + /*! List of duplicate operations attached to this operation. Once this operation + * completes, the duplicate operations will be closed out as well. */ + GList *duplicates; + + /*! The point at which the remote operation completed(nsec) */ + long long completed_nsec; + + /*! The (potentially intermediate) result of the operation */ + pcmk__action_result_t result; +} remote_fencing_op_t; + +void fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged); + +// Fencer-specific client flags +enum st_client_flags { + st_callback_unknown = UINT64_C(0), + st_callback_notify_fence = (UINT64_C(1) << 0), + st_callback_device_add = (UINT64_C(1) << 2), + st_callback_device_del = (UINT64_C(1) << 4), + st_callback_notify_history = (UINT64_C(1) << 5), + st_callback_notify_history_synced = (UINT64_C(1) << 6) +}; + +// How the user specified the target of a topology level +enum fenced_target_by { + fenced_target_by_unknown = -1, // Invalid or not yet parsed + fenced_target_by_name, // By target name + fenced_target_by_pattern, // By a pattern matching target names + fenced_target_by_attribute, // By a node attribute/value on target +}; + +/* + * Complex fencing requirements are specified via fencing topologies. + * A topology consists of levels; each level is a list of fencing devices. + * Topologies are stored in a hash table by node name. When a node needs to be + * fenced, if it has an entry in the topology table, the levels are tried + * sequentially, and the devices in each level are tried sequentially. + * Fencing is considered successful as soon as any level succeeds; + * a level is considered successful if all its devices succeed. + * Essentially, all devices at a given level are "and-ed" and the + * levels are "or-ed". + * + * This structure is used for the topology table entries. + * Topology levels start from 1, so levels[0] is unused and always NULL. + */ +typedef struct stonith_topology_s { + enum fenced_target_by kind; // How target was specified + + /*! Node name regex or attribute name=value for which topology applies */ + char *target; + char *target_value; + char *target_pattern; + char *target_attribute; + + /*! Names of fencing devices at each topology level */ + GList *levels[ST_LEVEL_MAX]; + +} stonith_topology_t; + +void init_device_list(void); +void free_device_list(void); +void init_topology_list(void); +void free_topology_list(void); +void free_stonith_remote_op_list(void); +void init_stonith_remote_op_hash_table(GHashTable **table); +void free_metadata_cache(void); +void fenced_unregister_handlers(void); + +uint64_t get_stonith_flag(const char *name); + +void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, + xmlNode *op_request, const char *remote_peer); + +int stonith_device_register(xmlNode *msg, gboolean from_cib); + +void stonith_device_remove(const char *id, bool from_cib); + +char *stonith_level_key(const xmlNode *msg, enum fenced_target_by); +void fenced_register_level(xmlNode *msg, char **desc, + pcmk__action_result_t *result); +void fenced_unregister_level(xmlNode *msg, char **desc, + pcmk__action_result_t *result); + +stonith_topology_t *find_topology_for_host(const char *host); + +void do_local_reply(xmlNode *notify_src, pcmk__client_t *client, + int call_options); + +xmlNode *fenced_construct_reply(const xmlNode *request, xmlNode *data, + const pcmk__action_result_t *result); + +void + do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout); + +void fenced_send_notification(const char *type, + const pcmk__action_result_t *result, + xmlNode *data); +void fenced_send_device_notification(const char *op, + const pcmk__action_result_t *result, + const char *desc); +void fenced_send_level_notification(const char *op, + const pcmk__action_result_t *result, + const char *desc); + +remote_fencing_op_t *initiate_remote_stonith_op(const pcmk__client_t *client, + xmlNode *request, + gboolean manual_ack); + +void fenced_process_fencing_reply(xmlNode *msg); + +int process_remote_stonith_query(xmlNode * msg); + +void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer); + +void stonith_fence_history(xmlNode *msg, xmlNode **output, + const char *remote_peer, int options); + +void stonith_fence_history_trim(void); + +bool fencing_peer_active(crm_node_t *peer); + +void set_fencing_completed(remote_fencing_op_t * op); + +int fenced_handle_manual_confirmation(const pcmk__client_t *client, + xmlNode *msg); +void fencer_metadata(void); + +const char *fenced_device_reboot_action(const char *device_id); +bool fenced_device_supports_on(const char *device_id); + +gboolean node_has_attr(const char *node, const char *name, const char *value); + +gboolean node_does_watchdog_fencing(const char *node); + +static inline void +fenced_set_protocol_error(pcmk__action_result_t *result) +{ + pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, + "Fencer API request missing required information (bug?)"); +} + +/*! + * \internal + * \brief Get the device flag to use with a given action when searching devices + * + * \param[in] action Action to check + * + * \return st_device_supports_on if \p action is "on", otherwise + * st_device_supports_none + */ +static inline uint32_t +fenced_support_flag(const char *action) +{ + if (pcmk__str_eq(action, "on", pcmk__str_none)) { + return st_device_supports_on; + } + return st_device_supports_none; +} + +extern char *stonith_our_uname; +extern gboolean stand_alone; +extern GHashTable *device_list; +extern GHashTable *topology; +extern long stonith_watchdog_timeout_ms; +extern GList *stonith_watchdog_targets; + +extern GHashTable *stonith_remote_op_list; diff --git a/daemons/pacemakerd/Makefile.am b/daemons/pacemakerd/Makefile.am new file mode 100644 index 0000000..fc0e014 --- /dev/null +++ b/daemons/pacemakerd/Makefile.am @@ -0,0 +1,37 @@ +# +# Copyright 2004-2021 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. +# + +include $(top_srcdir)/mk/common.mk +include $(top_srcdir)/mk/man.mk + +sbin_PROGRAMS = pacemakerd + +if BUILD_SYSTEMD +systemdsystemunit_DATA = pacemaker.service +endif + +EXTRA_DIST = pacemakerd.8.inc + +## SOURCES + +noinst_HEADERS = pacemakerd.h + +pacemakerd_CFLAGS = $(CFLAGS_HARDENED_EXE) +pacemakerd_LDFLAGS = $(LDFLAGS_HARDENED_EXE) + +pacemakerd_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la $(top_builddir)/lib/common/libcrmcommon.la +pacemakerd_LDADD += $(CLUSTERLIBS) +pacemakerd_SOURCES = pacemakerd.c +if BUILD_CS_SUPPORT +pacemakerd_SOURCES += pcmkd_corosync.c +endif +pacemakerd_SOURCES += pcmkd_messages.c +pacemakerd_SOURCES += pcmkd_subdaemons.c + +CLEANFILES = $(man8_MANS) diff --git a/daemons/pacemakerd/pacemaker.combined.upstart.in b/daemons/pacemakerd/pacemaker.combined.upstart.in new file mode 100644 index 0000000..af59ff0 --- /dev/null +++ b/daemons/pacemakerd/pacemaker.combined.upstart.in @@ -0,0 +1,67 @@ +# pacemaker-corosync - High-Availability cluster +# +# Starts Corosync cluster engine and Pacemaker cluster manager. + +# if you use automatic start, uncomment the line below. +#start on started local and runlevel [2345] + +stop on runlevel [0123456] +kill timeout 3600 +respawn + +env prog=pacemakerd +env sysconf=@CONFIGDIR@/pacemaker +env rpm_lockdir=@localstatedir@/lock/subsys +env deb_lockdir=@localstatedir@/lock + +script + [ -f "$sysconf" ] && . "$sysconf" + exec $prog +end script + +pre-start script + pidof corosync || start corosync + + # if you use corosync-notifyd, uncomment the line below. + #start corosync-notifyd + + # give it time to fail. + sleep 2 + pidof corosync || { exit 1; } + + # if you use crm_mon, uncomment the line below. + #start crm_mon +end script + +post-start script + [ -f "$sysconf" ] && . "$sysconf" + [ -z "$LOCK_FILE" -a -d "$rpm_lockdir" ] && LOCK_FILE="$rpm_lockdir/pacemaker" + [ -z "$LOCK_FILE" -a -d "$deb_lockdir" ] && LOCK_FILE="$deb_lockdir/pacemaker" + touch "$LOCK_FILE" + pidof $prog > "@localstatedir@/run/$prog.pid" +end script + +post-stop script + [ -f "$sysconf" ] && . "$sysconf" + [ -z "$LOCK_FILE" -a -d "$rpm_lockdir" ] && LOCK_FILE="$rpm_lockdir/pacemaker" + [ -z "$LOCK_FILE" -a -d "$deb_lockdir" ] && LOCK_FILE="$deb_lockdir/pacemaker" + rm -f "$LOCK_FILE" + rm -f "@localstatedir@/run/$prog.pid" + + # if you use corosync-notifyd, uncomment the line below. + #stop corosync-notifyd || true + + # if you use watchdog of corosync, uncomment the line below. + #pidof corosync || false + + pidof pacemaker-controld || stop corosync + + # if you want to reboot a machine by watchdog of corosync when + # pacemakerd disappeared unexpectedly, uncomment the line below + # and invalidate above "respawn" stanza. + #pidof pacemaker-controld && killall -q -9 corosync + + # if you use crm_mon, uncomment the line below. + #stop crm_mon + +end script diff --git a/daemons/pacemakerd/pacemaker.service.in b/daemons/pacemakerd/pacemaker.service.in new file mode 100644 index 0000000..3fd53d9 --- /dev/null +++ b/daemons/pacemakerd/pacemaker.service.in @@ -0,0 +1,103 @@ +[Unit] +Description=Pacemaker High Availability Cluster Manager +Documentation=man:pacemakerd +Documentation=https://clusterlabs.org/pacemaker/doc/ + +# DefaultDependencies takes care of sysinit.target, +# basic.target, and shutdown.target + +# We need networking to bind to a network address. It is recommended not to +# use Wants or Requires with network.target, and not to use +# network-online.target for server daemons. +After=network.target + +# Time syncs can make the clock jump backward, which messes with logging +# and failure timestamps, so wait until it's done. +After=time-sync.target + +# Managing systemd resources requires DBus. +After=dbus.service +Wants=dbus.service + +# Some OCF resources may have dependencies that aren't managed by the cluster; +# these must be started before Pacemaker and stopped after it. The +# resource-agents package provides this target, which lets system adminstrators +# add drop-ins for those dependencies. +After=resource-agents-deps.target +Wants=resource-agents-deps.target + +After=syslog.service +After=rsyslog.service +After=corosync.service +Requires=corosync.service + +# If Pacemaker respawns repeatedly, give up after this many tries in this time +StartLimitBurst=5 +StartLimitIntervalSec=25s + +[Install] +WantedBy=multi-user.target + + +[Service] +Type=simple +KillMode=process +NotifyAccess=main +EnvironmentFile=-@CONFIGDIR@/pacemaker +EnvironmentFile=-@CONFIGDIR@/sbd +SuccessExitStatus=100 + +ExecStart=@sbindir@/pacemakerd + +# Systemd v227 and above can limit the number of processes spawned by a +# service. That is a bad idea for an HA cluster resource manager, so disable it +# by default. The administrator can create a local override if they really want +# a limit. If your systemd version does not support TasksMax, and you want to +# get rid of the resulting log warnings, comment out this option. +TasksMax=infinity + +# If pacemakerd doesn't stop, it's probably waiting on a cluster +# resource. Sending -KILL will just get the node fenced +SendSIGKILL=no + +# Systemd's default of respawning a failed service after 100ms is too aggressive +RestartSec=1s + +# If we ever hit the StartLimitInterval/StartLimitBurst limit, and the +# admin wants to stop the cluster while pacemakerd is not running, it +# might be a good idea to enable the ExecStopPost directive below. +# +# However, the node will likely end up being fenced as a result, so it's +# not enabled by default. +# +# ExecStopPost=/usr/bin/killall -TERM pacemaker-attrd pacemaker-based \ +# pacemaker-controld pacemaker-execd pacemaker-fenced \ +# pacemaker-schedulerd + +# If you want Corosync to stop whenever Pacemaker is stopped, +# uncomment the next line too: +# +# ExecStopPost=/bin/sh -c 'pidof pacemaker-controld || killall -TERM corosync' + +# Pacemaker will restart along with Corosync if Corosync is stopped while +# Pacemaker is running. +# In this case, if you want to be fenced always (if you do not want to restart) +# uncomment ExecStopPost below. +# +# ExecStopPost=/bin/sh -c 'pidof corosync || \ +# /usr/bin/systemctl --no-block stop pacemaker' + +# When the service functions properly, it will wait to exit until all resources +# have been stopped on the local node, and potentially across all nodes that +# are shutting down. The default of 30min should cover most typical cluster +# configurations, but it may need an increase to adapt to local conditions +# (e.g. a large, clustered database could conceivably take longer to stop). +TimeoutStopSec=30min +TimeoutStartSec=60s + +# Restart options include: no, on-success, on-failure, on-abort or always +Restart=on-failure + +# crm_perror() writes directly to stderr, so ignore it here +# to avoid double-logging with the wrong format +StandardError=null diff --git a/daemons/pacemakerd/pacemaker.upstart.in b/daemons/pacemakerd/pacemaker.upstart.in new file mode 100644 index 0000000..7a54bc0 --- /dev/null +++ b/daemons/pacemakerd/pacemaker.upstart.in @@ -0,0 +1,33 @@ +# pacemaker - High-Availability cluster resource manager +# +# Starts pacemakerd + +stop on runlevel [0123456] +kill timeout 3600 +respawn + +env prog=pacemakerd +env sysconf=@CONFIGDIR@/pacemaker +env rpm_lockdir=@localstatedir@/lock/subsys +env deb_lockdir=@localstatedir@/lock + +script + [ -f "$sysconf" ] && . "$sysconf" + exec $prog +end script + +post-start script + [ -f "$sysconf" ] && . "$sysconf" + [ -z "$LOCK_FILE" -a -d "$rpm_lockdir" ] && LOCK_FILE="$rpm_lockdir/pacemaker" + [ -z "$LOCK_FILE" -a -d "$deb_lockdir" ] && LOCK_FILE="$deb_lockdir/pacemaker" + touch "$LOCK_FILE" + pidof $prog > "@localstatedir@/run/$prog.pid" +end script + +post-stop script + [ -f "$sysconf" ] && . "$sysconf" + [ -z "$LOCK_FILE" -a -d "$rpm_lockdir" ] && LOCK_FILE="$rpm_lockdir/pacemaker" + [ -z "$LOCK_FILE" -a -d "$deb_lockdir" ] && LOCK_FILE="$deb_lockdir/pacemaker" + rm -f "$LOCK_FILE" + rm -f "@localstatedir@/run/$prog.pid" +end script diff --git a/daemons/pacemakerd/pacemakerd.8.inc b/daemons/pacemakerd/pacemakerd.8.inc new file mode 100644 index 0000000..902af4e --- /dev/null +++ b/daemons/pacemakerd/pacemakerd.8.inc @@ -0,0 +1,5 @@ +[synopsis] +pacemakerd [options] + +/subsidiary Pacemaker daemons/ +.SH OPTIONS diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c new file mode 100644 index 0000000..9f77ccc --- /dev/null +++ b/daemons/pacemakerd/pacemakerd.c @@ -0,0 +1,483 @@ +/* + * Copyright 2010-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include +#include "pacemakerd.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include /* indirectly: CRM_EX_* */ +#include +#include +#include +#include +#include +#include +#include + +#define SUMMARY "pacemakerd - primary Pacemaker daemon that launches and monitors all subsidiary Pacemaker daemons" + +struct { + gboolean features; + gboolean foreground; + gboolean shutdown; + gboolean standby; +} options; + +static pcmk__output_t *out = NULL; + +static pcmk__supported_format_t formats[] = { + PCMK__SUPPORTED_FORMAT_NONE, + PCMK__SUPPORTED_FORMAT_TEXT, + PCMK__SUPPORTED_FORMAT_XML, + { NULL, NULL, NULL } +}; + +PCMK__OUTPUT_ARGS("features") +static int +pacemakerd_features(pcmk__output_t *out, va_list args) { + out->info(out, "Pacemaker %s (Build: %s)\n Supporting v%s: %s", PACEMAKER_VERSION, + BUILD_VERSION, CRM_FEATURE_SET, CRM_FEATURES); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("features") +static int +pacemakerd_features_xml(pcmk__output_t *out, va_list args) { + gchar **feature_list = g_strsplit(CRM_FEATURES, " ", 0); + + pcmk__output_xml_create_parent(out, "pacemakerd", + "version", PACEMAKER_VERSION, + "build", BUILD_VERSION, + "feature_set", CRM_FEATURE_SET, + NULL); + out->begin_list(out, NULL, NULL, "features"); + + for (char **s = feature_list; *s != NULL; s++) { + pcmk__output_create_xml_text_node(out, "feature", *s); + } + + out->end_list(out); + + g_strfreev(feature_list); + return pcmk_rc_ok; +} + +static pcmk__message_entry_t fmt_functions[] = { + { "features", "default", pacemakerd_features }, + { "features", "xml", pacemakerd_features_xml }, + + { NULL, NULL, NULL } +}; + +static gboolean +pid_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { + return TRUE; +} + +static gboolean +standby_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { + options.standby = TRUE; + pcmk__set_env_option("node_start_state", "standby"); + return TRUE; +} + +static GOptionEntry entries[] = { + { "features", 'F', 0, G_OPTION_ARG_NONE, &options.features, + "Display full version and list of features Pacemaker was built with", + NULL }, + { "foreground", 'f', 0, G_OPTION_ARG_NONE, &options.foreground, + "(Ignored) Pacemaker always runs in the foreground", + NULL }, + { "pid-file", 'p', 0, G_OPTION_ARG_CALLBACK, pid_cb, + "(Ignored) Daemon pid file location", + "FILE" }, + { "shutdown", 'S', 0, G_OPTION_ARG_NONE, &options.shutdown, + "Instruct Pacemaker to shutdown on this machine", + NULL }, + { "standby", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, standby_cb, + "Start node in standby state", + NULL }, + + { NULL } +}; + +static void +pcmk_ignore(int nsig) +{ + crm_info("Ignoring signal %s (%d)", strsignal(nsig), nsig); +} + +static void +pcmk_sigquit(int nsig) +{ + pcmk__panic(__func__); +} + +static void +mcp_chown(const char *path, uid_t uid, gid_t gid) +{ + int rc = chown(path, uid, gid); + + if (rc < 0) { + crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s", + path, CRM_DAEMON_USER, gid, pcmk_rc_str(errno)); + } +} + +static void +create_pcmk_dirs(void) +{ + uid_t pcmk_uid = 0; + gid_t pcmk_gid = 0; + + const char *dirs[] = { + CRM_PACEMAKER_DIR, // core/blackbox/scheduler/CIB files + CRM_CORE_DIR, // core files + CRM_BLACKBOX_DIR, // blackbox dumps + PE_STATE_DIR, // scheduler inputs + CRM_CONFIG_DIR, // the Cluster Information Base (CIB) + // Don't build CRM_RSCTMP_DIR, pacemaker-execd will do it + NULL + }; + + if (pcmk_daemon_user(&pcmk_uid, &pcmk_gid) < 0) { + crm_err("Cluster user %s does not exist, aborting Pacemaker startup", + CRM_DAEMON_USER); + crm_exit(CRM_EX_NOUSER); + } + + // Used by some resource agents + if ((mkdir(CRM_STATE_DIR, 0750) < 0) && (errno != EEXIST)) { + crm_warn("Could not create directory " CRM_STATE_DIR ": %s", + pcmk_rc_str(errno)); + } else { + mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid); + } + + for (int i = 0; dirs[i] != NULL; ++i) { + int rc = pcmk__build_path(dirs[i], 0750); + + if (rc != pcmk_rc_ok) { + crm_warn("Could not create directory %s: %s", + dirs[i], pcmk_rc_str(rc)); + } else { + mcp_chown(dirs[i], pcmk_uid, pcmk_gid); + } + } +} + +static void +remove_core_file_limit(void) +{ + struct rlimit cores; + + // Get current limits + if (getrlimit(RLIMIT_CORE, &cores) < 0) { + crm_notice("Unable to check system core file limits " + "(consider ensuring the size is unlimited): %s", + strerror(errno)); + return; + } + + // Check whether core dumps are disabled + if (cores.rlim_max == 0) { + if (geteuid() != 0) { // Yes, and there's nothing we can do about it + crm_notice("Core dumps are disabled (consider enabling them)"); + return; + } + cores.rlim_max = RLIM_INFINITY; // Yes, but we're root, so enable them + } + + // Raise soft limit to hard limit (if not already done) + if (cores.rlim_cur != cores.rlim_max) { + cores.rlim_cur = cores.rlim_max; + if (setrlimit(RLIMIT_CORE, &cores) < 0) { + crm_notice("Unable to raise system limit on core file size " + "(consider doing so manually): %s", + strerror(errno)); + return; + } + } + + if (cores.rlim_cur == RLIM_INFINITY) { + crm_trace("Core file size is unlimited"); + } else { + crm_trace("Core file size is limited to %llu bytes", + (unsigned long long) cores.rlim_cur); + } +} + +static void +pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, + enum pcmk_ipc_event event_type, crm_exit_t status, + void *event_data, void *user_data) +{ + pcmk_pacemakerd_api_reply_t *reply = event_data; + + switch (event_type) { + case pcmk_ipc_event_reply: + break; + + default: + return; + } + + if (status != CRM_EX_OK) { + out->err(out, "Bad reply from pacemakerd: %s", crm_exit_str(status)); + return; + } + + if (reply->reply_type != pcmk_pacemakerd_reply_shutdown) { + out->err(out, "Unknown reply type %d from pacemakerd", + reply->reply_type); + } +} + +static GOptionContext * +build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { + GOptionContext *context = NULL; + + context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); + pcmk__add_main_args(context, entries); + return context; +} + +int +main(int argc, char **argv) +{ + int rc = pcmk_rc_ok; + crm_exit_t exit_code = CRM_EX_OK; + + GError *error = NULL; + + GOptionGroup *output_group = NULL; + pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); + gchar **processed_args = pcmk__cmdline_preproc(argv, "p"); + GOptionContext *context = build_arg_context(args, &output_group); + + bool old_instance_connected = false; + + pcmk_ipc_api_t *old_instance = NULL; + qb_ipcs_service_t *ipcs = NULL; + + subdaemon_check_progress = time(NULL); + + setenv("LC_ALL", "C", 1); // Ensure logs are in a common language + + crm_log_preinit(NULL, argc, argv); + mainloop_add_signal(SIGHUP, pcmk_ignore); + mainloop_add_signal(SIGQUIT, pcmk_sigquit); + + pcmk__register_formats(output_group, formats); + if (!g_option_context_parse_strv(context, &processed_args, &error)) { + exit_code = CRM_EX_USAGE; + goto done; + } + + rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); + if ((rc != pcmk_rc_ok) || (out == NULL)) { + exit_code = CRM_EX_ERROR; + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", + args->output_ty, pcmk_rc_str(rc)); + goto done; + } + + pcmk__force_args(context, &error, "%s --xml-simple-list", g_get_prgname()); + + pcmk__register_messages(out, fmt_functions); + + if (options.features) { + out->message(out, "features"); + exit_code = CRM_EX_OK; + goto done; + } + + if (args->version) { + out->version(out, false); + goto done; + } + + pcmk__set_env_option("mcp", "true"); + + if (options.shutdown) { + pcmk__cli_init_logging("pacemakerd", args->verbosity); + } else { + crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); + } + + crm_debug("Checking for existing Pacemaker instance"); + + rc = pcmk_new_ipc_api(&old_instance, pcmk_ipc_pacemakerd); + if (old_instance == NULL) { + out->err(out, "Could not check for existing pacemakerd: %s", pcmk_rc_str(rc)); + exit_code = pcmk_rc2exitc(rc); + goto done; + } + + pcmk_register_ipc_callback(old_instance, pacemakerd_event_cb, NULL); + rc = pcmk_connect_ipc(old_instance, pcmk_ipc_dispatch_sync); + old_instance_connected = pcmk_ipc_is_connected(old_instance); + + if (options.shutdown) { + if (old_instance_connected) { + rc = pcmk_pacemakerd_api_shutdown(old_instance, crm_system_name); + pcmk_dispatch_ipc(old_instance); + + exit_code = pcmk_rc2exitc(rc); + + if (exit_code != CRM_EX_OK) { + pcmk_free_ipc_api(old_instance); + goto done; + } + + /* We get the ACK immediately, and the response right after that, + * but it might take a while for pacemakerd to get around to + * shutting down. Wait for that to happen (with 30-minute timeout). + */ + for (int i = 0; i < 900; i++) { + if (!pcmk_ipc_is_connected(old_instance)) { + exit_code = CRM_EX_OK; + pcmk_free_ipc_api(old_instance); + goto done; + } + + sleep(2); + } + + exit_code = CRM_EX_TIMEOUT; + pcmk_free_ipc_api(old_instance); + goto done; + + } else { + out->err(out, "Could not request shutdown " + "of existing Pacemaker instance: %s", pcmk_rc_str(rc)); + pcmk_free_ipc_api(old_instance); + exit_code = CRM_EX_DISCONNECT; + goto done; + } + + } else if (old_instance_connected) { + pcmk_free_ipc_api(old_instance); + crm_err("Aborting start-up because active Pacemaker instance found"); + exit_code = CRM_EX_FATAL; + goto done; + } + + pcmk_free_ipc_api(old_instance); + + /* Don't allow any accidental output after this point. */ + if (out != NULL) { + out->finish(out, exit_code, true, NULL); + pcmk__output_free(out); + out = NULL; + } + +#ifdef SUPPORT_COROSYNC + if (mcp_read_config() == FALSE) { + crm_exit(CRM_EX_UNAVAILABLE); + } +#endif + + // OCF shell functions and cluster-glue need facility under different name + { + const char *facility = pcmk__env_option(PCMK__ENV_LOGFACILITY); + + if (!pcmk__str_eq(facility, PCMK__VALUE_NONE, + pcmk__str_casei|pcmk__str_null_matches)) { + setenv("HA_LOGFACILITY", facility, 1); + } + } + + crm_notice("Starting Pacemaker %s "CRM_XS" build=%s features:%s", + PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES); + mainloop = g_main_loop_new(NULL, FALSE); + + remove_core_file_limit(); + create_pcmk_dirs(); + pcmk__serve_pacemakerd_ipc(&ipcs, &mcp_ipc_callbacks); + +#ifdef SUPPORT_COROSYNC + /* Allows us to block shutdown */ + if (!cluster_connect_cfg()) { + exit_code = CRM_EX_PROTOCOL; + goto done; + } +#endif + + if (pcmk__locate_sbd() > 0) { + setenv("PCMK_watchdog", "true", 1); + running_with_sbd = TRUE; + } else { + setenv("PCMK_watchdog", "false", 1); + } + + switch (find_and_track_existing_processes()) { + case pcmk_rc_ok: + break; + case pcmk_rc_ipc_unauthorized: + exit_code = CRM_EX_CANTCREAT; + goto done; + default: + exit_code = CRM_EX_FATAL; + goto done; + }; + + mainloop_add_signal(SIGTERM, pcmk_shutdown); + mainloop_add_signal(SIGINT, pcmk_shutdown); + + if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) { + crm_notice("Waiting for startup-trigger from SBD."); + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING; + startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL); + } else { + if (running_with_sbd) { + crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported " + "by your SBD version) improve reliability of " + "interworking between SBD & pacemaker."); + } + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS; + init_children_processes(NULL); + } + + crm_notice("Pacemaker daemon successfully started and accepting connections"); + g_main_loop_run(mainloop); + + if (ipcs) { + crm_trace("Closing IPC server"); + mainloop_del_ipc_server(ipcs); + ipcs = NULL; + } + + g_main_loop_unref(mainloop); +#ifdef SUPPORT_COROSYNC + cluster_disconnect_cfg(); +#endif + +done: + g_strfreev(processed_args); + pcmk__free_arg_context(context); + + pcmk__output_and_clear_error(&error, out); + + if (out != NULL) { + out->finish(out, exit_code, true, NULL); + pcmk__output_free(out); + } + pcmk__unregister_formats(); + crm_exit(exit_code); +} diff --git a/daemons/pacemakerd/pacemakerd.h b/daemons/pacemakerd/pacemakerd.h new file mode 100644 index 0000000..b2a6864 --- /dev/null +++ b/daemons/pacemakerd/pacemakerd.h @@ -0,0 +1,35 @@ +/* + * Copyright 2010-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include + +#define MAX_RESPAWN 100 + +extern GMainLoop *mainloop; +extern struct qb_ipcs_service_handlers mcp_ipc_callbacks; +extern const char *pacemakerd_state; +extern gboolean running_with_sbd; +extern unsigned int shutdown_complete_state_reported_to; +extern gboolean shutdown_complete_state_reported_client_closed; +extern crm_trigger_t *shutdown_trigger; +extern crm_trigger_t *startup_trigger; +extern time_t subdaemon_check_progress; + +gboolean mcp_read_config(void); + +gboolean cluster_connect_cfg(void); +void cluster_disconnect_cfg(void); +int find_and_track_existing_processes(void); +gboolean init_children_processes(void *user_data); +void restart_cluster_subdaemons(void); +void pcmk_shutdown(int nsig); +void pcmkd_shutdown_corosync(void); +bool pcmkd_corosync_connected(void); diff --git a/daemons/pacemakerd/pcmkd_corosync.c b/daemons/pacemakerd/pcmkd_corosync.c new file mode 100644 index 0000000..2648756 --- /dev/null +++ b/daemons/pacemakerd/pcmkd_corosync.c @@ -0,0 +1,371 @@ +/* + * Copyright 2010-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include +#include "pacemakerd.h" + +#include +#include /* for calls to stat() */ +#include /* For basename() and dirname() */ + +#include +#include /* For getpwname() */ + +#include +#include +#include +#include + +#include +#include /* for crm_ipc_is_authentic_process */ +#include + +#include /* PCMK__SPECIAL_PID* */ + +static corosync_cfg_handle_t cfg_handle = 0; +static mainloop_timer_t *reconnect_timer = NULL; + +/* =::=::=::= CFG - Shutdown stuff =::=::=::= */ + +static void +cfg_shutdown_callback(corosync_cfg_handle_t h, corosync_cfg_shutdown_flags_t flags) +{ + crm_info("Corosync wants to shut down: %s", + (flags == COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE) ? "immediate" : + (flags == COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS) ? "forced" : "optional"); + + /* Never allow corosync to shut down while we're running */ + corosync_cfg_replyto_shutdown(h, COROSYNC_CFG_SHUTDOWN_FLAG_NO); +} + +static corosync_cfg_callbacks_t cfg_callbacks = { + .corosync_cfg_shutdown_callback = cfg_shutdown_callback, +}; + +static int +pcmk_cfg_dispatch(gpointer user_data) +{ + corosync_cfg_handle_t *handle = (corosync_cfg_handle_t *) user_data; + cs_error_t rc = corosync_cfg_dispatch(*handle, CS_DISPATCH_ALL); + + if (rc != CS_OK) { + return -1; + } + return 0; +} + +static void +close_cfg(void) +{ + if (cfg_handle != 0) { +#ifdef HAVE_COROSYNC_CFG_TRACKSTART + /* Ideally, we would call corosync_cfg_trackstop(cfg_handle) here, but a + * bug in corosync 3.1.1 and 3.1.2 makes it hang forever. Thankfully, + * it's not necessary since we exit immediately after this. + */ +#endif + corosync_cfg_finalize(cfg_handle); + cfg_handle = 0; + } +} + +static gboolean +cluster_reconnect_cb(gpointer data) +{ + if (cluster_connect_cfg()) { + mainloop_timer_del(reconnect_timer); + reconnect_timer = NULL; + crm_notice("Cluster reconnect succeeded"); + mcp_read_config(); + restart_cluster_subdaemons(); + return G_SOURCE_REMOVE; + } else { + crm_info("Cluster reconnect failed " + "(connection will be reattempted once per second)"); + } + /* + * In theory this will continue forever. In practice the CIB connection from + * attrd will timeout and shut down Pacemaker when it gets bored. + */ + return G_SOURCE_CONTINUE; +} + + +static void +cfg_connection_destroy(gpointer user_data) +{ + crm_warn("Lost connection to cluster layer " + "(connection will be reattempted once per second)"); + corosync_cfg_finalize(cfg_handle); + cfg_handle = 0; + reconnect_timer = mainloop_timer_add("corosync reconnect", 1000, TRUE, cluster_reconnect_cb, NULL); + mainloop_timer_start(reconnect_timer); +} + +void +cluster_disconnect_cfg(void) +{ + close_cfg(); + if (reconnect_timer != NULL) { + /* The mainloop should be gone by this point, so this isn't necessary, + * but cleaning up memory should make valgrind happier. + */ + mainloop_timer_del(reconnect_timer); + reconnect_timer = NULL; + } +} + +#define cs_repeat(counter, max, code) do { \ + code; \ + if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ + counter++; \ + crm_debug("Retrying Corosync operation after %ds", counter); \ + sleep(counter); \ + } else { \ + break; \ + } \ + } while(counter < max) + +gboolean +cluster_connect_cfg(void) +{ + cs_error_t rc; + int fd = -1, retries = 0, rv; + uid_t found_uid = 0; + gid_t found_gid = 0; + pid_t found_pid = 0; + uint32_t nodeid; + + static struct mainloop_fd_callbacks cfg_fd_callbacks = { + .dispatch = pcmk_cfg_dispatch, + .destroy = cfg_connection_destroy, + }; + + cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks)); + + if (rc != CS_OK) { + crm_crit("Could not connect to Corosync CFG: %s " CRM_XS " rc=%d", + cs_strerror(rc), rc); + return FALSE; + } + + rc = corosync_cfg_fd_get(cfg_handle, &fd); + if (rc != CS_OK) { + crm_crit("Could not get Corosync CFG descriptor: %s " CRM_XS " rc=%d", + cs_strerror(rc), rc); + goto bail; + } + + /* CFG provider run as root (in given user namespace, anyway)? */ + if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, + &found_uid, &found_gid))) { + crm_crit("Rejecting Corosync CFG provider because process %lld " + "is running as uid %lld gid %lld, not root", + (long long) PCMK__SPECIAL_PID_AS_0(found_pid), + (long long) found_uid, (long long) found_gid); + goto bail; + } else if (rv < 0) { + crm_crit("Could not authenticate Corosync CFG provider: %s " + CRM_XS " rc=%d", strerror(-rv), -rv); + goto bail; + } + + retries = 0; + cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, &nodeid)); + if (rc != CS_OK) { + crm_crit("Could not get local node ID from Corosync: %s " + CRM_XS " rc=%d", cs_strerror(rc), rc); + goto bail; + } + crm_debug("Corosync reports local node ID is %lu", (unsigned long) nodeid); + +#ifdef HAVE_COROSYNC_CFG_TRACKSTART + retries = 0; + cs_repeat(retries, 30, rc = corosync_cfg_trackstart(cfg_handle, 0)); + if (rc != CS_OK) { + crm_crit("Could not enable Corosync CFG shutdown tracker: %s " CRM_XS " rc=%d", + cs_strerror(rc), rc); + goto bail; + } +#endif + + mainloop_add_fd("corosync-cfg", G_PRIORITY_DEFAULT, fd, &cfg_handle, &cfg_fd_callbacks); + return TRUE; + + bail: + corosync_cfg_finalize(cfg_handle); + return FALSE; +} + +void +pcmkd_shutdown_corosync(void) +{ + cs_error_t rc; + + if (cfg_handle == 0) { + crm_warn("Unable to shut down Corosync: No connection"); + return; + } + crm_info("Asking Corosync to shut down"); + rc = corosync_cfg_try_shutdown(cfg_handle, + COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE); + if (rc == CS_OK) { + close_cfg(); + } else { + crm_warn("Corosync shutdown failed: %s " CRM_XS " rc=%d", + cs_strerror(rc), rc); + } +} + +bool +pcmkd_corosync_connected(void) +{ + cpg_handle_t local_handle = 0; + cpg_model_v1_data_t cpg_model_info = {CPG_MODEL_V1, NULL, NULL, NULL, 0}; + int fd = -1; + + if (cpg_model_initialize(&local_handle, CPG_MODEL_V1, (cpg_model_data_t *) &cpg_model_info, NULL) != CS_OK) { + return false; + } + + if (cpg_fd_get(local_handle, &fd) != CS_OK) { + return false; + } + + cpg_finalize(local_handle); + + return true; +} + +/* =::=::=::= Configuration =::=::=::= */ +static int +get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, char **value, + const char *fallback) +{ + int rc = 0, retries = 0; + + cs_repeat(retries, 5, rc = cmap_get_string(object_handle, key, value)); + if (rc != CS_OK) { + crm_trace("Search for %s failed %d, defaulting to %s", key, rc, fallback); + pcmk__str_update(value, fallback); + } + crm_trace("%s: %s", key, *value); + return rc; +} + +gboolean +mcp_read_config(void) +{ + cs_error_t rc = CS_OK; + int retries = 0; + cmap_handle_t local_handle; + uint64_t config = 0; + int fd = -1; + uid_t found_uid = 0; + gid_t found_gid = 0; + pid_t found_pid = 0; + int rv; + enum cluster_type_e stack; + + // There can be only one possibility + do { + rc = pcmk__init_cmap(&local_handle); + if (rc != CS_OK) { + retries++; + crm_info("Could not connect to Corosync CMAP: %s (retrying in %ds) " + CRM_XS " rc=%d", cs_strerror(rc), retries, rc); + sleep(retries); + + } else { + break; + } + + } while (retries < 5); + + if (rc != CS_OK) { + crm_crit("Could not connect to Corosync CMAP: %s " + CRM_XS " rc=%d", cs_strerror(rc), rc); + return FALSE; + } + + rc = cmap_fd_get(local_handle, &fd); + if (rc != CS_OK) { + crm_crit("Could not get Corosync CMAP descriptor: %s " CRM_XS " rc=%d", + cs_strerror(rc), rc); + cmap_finalize(local_handle); + return FALSE; + } + + /* CMAP provider run as root (in given user namespace, anyway)? */ + if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, + &found_uid, &found_gid))) { + crm_crit("Rejecting Corosync CMAP provider because process %lld " + "is running as uid %lld gid %lld, not root", + (long long) PCMK__SPECIAL_PID_AS_0(found_pid), + (long long) found_uid, (long long) found_gid); + cmap_finalize(local_handle); + return FALSE; + } else if (rv < 0) { + crm_crit("Could not authenticate Corosync CMAP provider: %s " + CRM_XS " rc=%d", strerror(-rv), -rv); + cmap_finalize(local_handle); + return FALSE; + } + + stack = get_cluster_type(); + if (stack != pcmk_cluster_corosync) { + crm_crit("Expected Corosync cluster layer but detected %s " + CRM_XS " stack=%d", name_for_cluster_type(stack), stack); + return FALSE; + } + + crm_info("Reading configuration for %s stack", + name_for_cluster_type(stack)); + pcmk__set_env_option(PCMK__ENV_CLUSTER_TYPE, "corosync"); + pcmk__set_env_option(PCMK__ENV_QUORUM_TYPE, "corosync"); + + // If debug logging is not configured, check whether corosync has it + if (pcmk__env_option(PCMK__ENV_DEBUG) == NULL) { + char *debug_enabled = NULL; + + get_config_opt(config, local_handle, "logging.debug", &debug_enabled, "off"); + + if (crm_is_true(debug_enabled)) { + pcmk__set_env_option(PCMK__ENV_DEBUG, "1"); + if (get_crm_log_level() < LOG_DEBUG) { + set_crm_log_level(LOG_DEBUG); + } + + } else { + pcmk__set_env_option(PCMK__ENV_DEBUG, "0"); + } + + free(debug_enabled); + } + + if(local_handle){ + gid_t gid = 0; + if (pcmk_daemon_user(NULL, &gid) < 0) { + crm_warn("Could not authorize group with Corosync " CRM_XS + " No group found for user %s", CRM_DAEMON_USER); + + } else { + char key[PATH_MAX]; + snprintf(key, PATH_MAX, "uidgid.gid.%u", gid); + rc = cmap_set_uint8(local_handle, key, 1); + if (rc != CS_OK) { + crm_warn("Could not authorize group with Corosync: %s " CRM_XS + " group=%u rc=%d", pcmk__cs_err_str(rc), gid, rc); + } + } + } + cmap_finalize(local_handle); + + return TRUE; +} diff --git a/daemons/pacemakerd/pcmkd_messages.c b/daemons/pacemakerd/pcmkd_messages.c new file mode 100644 index 0000000..7ed9899 --- /dev/null +++ b/daemons/pacemakerd/pcmkd_messages.c @@ -0,0 +1,278 @@ +/* + * Copyright 2010-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include +#include "pacemakerd.h" + +#include +#include + +#include +#include +#include +#include +#include +#include + +static GHashTable *pcmkd_handlers = NULL; + +static xmlNode * +handle_node_cache_request(pcmk__request_t *request) +{ + crm_trace("Ignoring request from client %s to purge node " + "because peer cache is not used", + pcmk__client_name(request->ipc_client)); + + pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, + "ack", NULL, CRM_EX_OK); + return NULL; +} + +static xmlNode * +handle_ping_request(pcmk__request_t *request) +{ + xmlNode *msg = request->xml; + + const char *value = NULL; + xmlNode *ping = NULL; + xmlNode *reply = NULL; + const char *from = crm_element_value(msg, F_CRM_SYS_FROM); + + /* Pinged for status */ + crm_trace("Pinged from " F_CRM_SYS_FROM "='%s' " F_CRM_ORIGIN "='%s'", + pcmk__s(from, ""), + pcmk__s(crm_element_value(msg, F_CRM_ORIGIN), "")); + + pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, + "ack", NULL, CRM_EX_INDETERMINATE); + + ping = create_xml_node(NULL, XML_CRM_TAG_PING); + value = crm_element_value(msg, F_CRM_SYS_TO); + crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value); + crm_xml_add(ping, XML_PING_ATTR_PACEMAKERDSTATE, pacemakerd_state); + crm_xml_add_ll(ping, XML_ATTR_TSTAMP, + (long long) subdaemon_check_progress); + crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok"); + reply = create_reply(msg, ping); + + free_xml(ping); + + if (reply == NULL) { + pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, + "Failed building ping reply for client %s", + pcmk__client_name(request->ipc_client)); + } else { + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + } + + /* just proceed state on sbd pinging us */ + if (from && strstr(from, "sbd")) { + if (pcmk__str_eq(pacemakerd_state, XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE, pcmk__str_none)) { + if (pcmk__get_sbd_sync_resource_startup()) { + crm_notice("Shutdown-complete-state passed to SBD."); + } + + shutdown_complete_state_reported_to = request->ipc_client->pid; + + } else if (pcmk__str_eq(pacemakerd_state, XML_PING_ATTR_PACEMAKERDSTATE_WAITPING, pcmk__str_none)) { + crm_notice("Received startup-trigger from SBD."); + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS; + mainloop_set_trigger(startup_trigger); + } + } + + return reply; +} + +static xmlNode * +handle_shutdown_request(pcmk__request_t *request) +{ + xmlNode *msg = request->xml; + + xmlNode *shutdown = NULL; + xmlNode *reply = NULL; + + /* Only allow privileged users (i.e. root or hacluster) to shut down + * Pacemaker from the command line (or direct IPC), so that other users + * are forced to go through the CIB and have ACLs applied. + */ + bool allowed = pcmk_is_set(request->ipc_client->flags, pcmk__client_privileged); + + pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, + "ack", NULL, CRM_EX_INDETERMINATE); + + shutdown = create_xml_node(NULL, XML_CIB_ATTR_SHUTDOWN); + + if (allowed) { + crm_notice("Shutting down in response to IPC request %s from %s", + crm_element_value(msg, F_CRM_REFERENCE), + crm_element_value(msg, F_CRM_ORIGIN)); + crm_xml_add_int(shutdown, XML_LRM_ATTR_OPSTATUS, CRM_EX_OK); + } else { + crm_warn("Ignoring shutdown request from unprivileged client %s", + pcmk__client_name(request->ipc_client)); + crm_xml_add_int(shutdown, XML_LRM_ATTR_OPSTATUS, CRM_EX_INSUFFICIENT_PRIV); + } + + reply = create_reply(msg, shutdown); + free_xml(shutdown); + + if (reply == NULL) { + pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, + "Failed building shutdown reply for client %s", + pcmk__client_name(request->ipc_client)); + } else { + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + } + + if (allowed) { + pcmk_shutdown(15); + } + + return reply; +} + +static xmlNode * +handle_unknown_request(pcmk__request_t *request) +{ + pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, + "ack", NULL, CRM_EX_INVALID_PARAM); + + pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, + "Unknown IPC request type '%s' (bug?)", + pcmk__client_name(request->ipc_client)); + return NULL; +} + +static void +pcmkd_register_handlers(void) +{ + pcmk__server_command_t handlers[] = { + { CRM_OP_RM_NODE_CACHE, handle_node_cache_request }, + { CRM_OP_PING, handle_ping_request }, + { CRM_OP_QUIT, handle_shutdown_request }, + { NULL, handle_unknown_request }, + }; + + pcmkd_handlers = pcmk__register_handlers(handlers); +} + +static int32_t +pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) +{ + crm_trace("Connection %p", c); + if (pcmk__new_client(c, uid, gid) == NULL) { + return -EIO; + } + return 0; +} + +/* Error code means? */ +static int32_t +pcmk_ipc_closed(qb_ipcs_connection_t * c) +{ + pcmk__client_t *client = pcmk__find_client(c); + + if (client == NULL) { + return 0; + } + crm_trace("Connection %p", c); + if (shutdown_complete_state_reported_to == client->pid) { + shutdown_complete_state_reported_client_closed = TRUE; + if (shutdown_trigger) { + mainloop_set_trigger(shutdown_trigger); + } + } + pcmk__free_client(client); + return 0; +} + +static void +pcmk_ipc_destroy(qb_ipcs_connection_t * c) +{ + crm_trace("Connection %p", c); + pcmk_ipc_closed(c); +} + +/* Exit code means? */ +static int32_t +pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) +{ + uint32_t id = 0; + uint32_t flags = 0; + xmlNode *msg = NULL; + pcmk__client_t *c = pcmk__find_client(qbc); + + CRM_CHECK(c != NULL, return 0); + + if (pcmkd_handlers == NULL) { + pcmkd_register_handlers(); + } + + msg = pcmk__client_data2xml(c, data, &id, &flags); + if (msg == NULL) { + pcmk__ipc_send_ack(c, id, flags, "ack", NULL, CRM_EX_PROTOCOL); + return 0; + + } else { + char *log_msg = NULL; + const char *reason = NULL; + xmlNode *reply = NULL; + + pcmk__request_t request = { + .ipc_client = c, + .ipc_id = id, + .ipc_flags = flags, + .peer = NULL, + .xml = msg, + .call_options = 0, + .result = PCMK__UNKNOWN_RESULT, + }; + + request.op = crm_element_value_copy(request.xml, F_CRM_TASK); + CRM_CHECK(request.op != NULL, return 0); + + reply = pcmk__process_request(&request, pcmkd_handlers); + + if (reply != NULL) { + pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event); + free_xml(reply); + } + + reason = request.result.exit_reason; + + log_msg = crm_strdup_printf("Processed %s request from %s %s: %s%s%s%s", + request.op, pcmk__request_origin_type(&request), + pcmk__request_origin(&request), + pcmk_exec_status_str(request.result.execution_status), + (reason == NULL)? "" : " (", + (reason == NULL)? "" : reason, + (reason == NULL)? "" : ")"); + + if (!pcmk__result_ok(&request.result)) { + crm_warn("%s", log_msg); + } else { + crm_debug("%s", log_msg); + } + + free(log_msg); + pcmk__reset_request(&request); + } + + free_xml(msg); + return 0; +} + +struct qb_ipcs_service_handlers mcp_ipc_callbacks = { + .connection_accept = pcmk_ipc_accept, + .connection_created = NULL, + .msg_process = pcmk_ipc_dispatch, + .connection_closed = pcmk_ipc_closed, + .connection_destroyed = pcmk_ipc_destroy +}; diff --git a/daemons/pacemakerd/pcmkd_subdaemons.c b/daemons/pacemakerd/pcmkd_subdaemons.c new file mode 100644 index 0000000..3b08ecc --- /dev/null +++ b/daemons/pacemakerd/pcmkd_subdaemons.c @@ -0,0 +1,888 @@ +/* + * Copyright 2010-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include +#include "pacemakerd.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +typedef struct pcmk_child_s { + pid_t pid; + int respawn_count; + bool respawn; + const char *name; + const char *uid; + const char *command; + const char *endpoint; /* IPC server name */ + bool needs_cluster; + int check_count; + + /* Anything below here will be dynamically initialized */ + bool needs_retry; + bool active_before_startup; +} pcmk_child_t; + +#define PCMK_PROCESS_CHECK_INTERVAL 1 +#define PCMK_PROCESS_CHECK_RETRIES 5 +#define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */ + +/* Index into the array below */ +#define PCMK_CHILD_CONTROLD 5 + +static pcmk_child_t pcmk_children[] = { + { + 0, 0, true, "pacemaker-based", CRM_DAEMON_USER, + CRM_DAEMON_DIR "/pacemaker-based", PCMK__SERVER_BASED_RO, + true + }, + { + 0, 0, true, "pacemaker-fenced", NULL, + CRM_DAEMON_DIR "/pacemaker-fenced", "stonith-ng", + true + }, + { + 0, 0, true, "pacemaker-execd", NULL, + CRM_DAEMON_DIR "/pacemaker-execd", CRM_SYSTEM_LRMD, + false + }, + { + 0, 0, true, "pacemaker-attrd", CRM_DAEMON_USER, + CRM_DAEMON_DIR "/pacemaker-attrd", T_ATTRD, + true + }, + { + 0, 0, true, "pacemaker-schedulerd", CRM_DAEMON_USER, + CRM_DAEMON_DIR "/pacemaker-schedulerd", CRM_SYSTEM_PENGINE, + false + }, + { + 0, 0, true, "pacemaker-controld", CRM_DAEMON_USER, + CRM_DAEMON_DIR "/pacemaker-controld", CRM_SYSTEM_CRMD, + true + }, +}; + +static char *opts_default[] = { NULL, NULL }; +static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL }; + +crm_trigger_t *shutdown_trigger = NULL; +crm_trigger_t *startup_trigger = NULL; +time_t subdaemon_check_progress = 0; + +// Whether we need root group access to talk to cluster layer +static bool need_root_group = true; + +/* When contacted via pacemakerd-api by a client having sbd in + * the name we assume it is sbd-daemon which wants to know + * if pacemakerd shutdown gracefully. + * Thus when everything is shutdown properly pacemakerd + * waits till it has reported the graceful completion of + * shutdown to sbd and just when sbd-client closes the + * connection we can assume that the report has arrived + * properly so that pacemakerd can finally exit. + * Following two variables are used to track that handshake. + */ +unsigned int shutdown_complete_state_reported_to = 0; +gboolean shutdown_complete_state_reported_client_closed = FALSE; + +/* state we report when asked via pacemakerd-api status-ping */ +const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT; +gboolean running_with_sbd = FALSE; /* local copy */ + +GMainLoop *mainloop = NULL; + +static gboolean fatal_error = FALSE; + +static int child_liveness(pcmk_child_t *child); +static gboolean escalate_shutdown(gpointer data); +static int start_child(pcmk_child_t * child); +static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode); +static void pcmk_process_exit(pcmk_child_t * child); +static gboolean pcmk_shutdown_worker(gpointer user_data); +static gboolean stop_child(pcmk_child_t * child, int signal); + +static bool +pcmkd_cluster_connected(void) +{ +#if SUPPORT_COROSYNC + return pcmkd_corosync_connected(); +#else + return true; +#endif +} + +static gboolean +check_next_subdaemon(gpointer user_data) +{ + static int next_child = 0; + int rc = child_liveness(&pcmk_children[next_child]); + + crm_trace("Checked %s[%lld]: %s (%d)", + pcmk_children[next_child].name, + (long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[next_child].pid), + pcmk_rc_str(rc), rc); + + switch (rc) { + case pcmk_rc_ok: + pcmk_children[next_child].check_count = 0; + subdaemon_check_progress = time(NULL); + break; + case pcmk_rc_ipc_pid_only: // This case: it was previously OK + pcmk_children[next_child].check_count++; + if (pcmk_children[next_child].check_count >= PCMK_PROCESS_CHECK_RETRIES) { + crm_err("%s[%lld] is unresponsive to ipc after %d tries but " + "we found the pid so have it killed that we can restart", + pcmk_children[next_child].name, + (long long) PCMK__SPECIAL_PID_AS_0( + pcmk_children[next_child].pid), + pcmk_children[next_child].check_count); + stop_child(&pcmk_children[next_child], SIGKILL); + if (pcmk_children[next_child].respawn) { + /* as long as the respawn-limit isn't reached + give it another round of check retries + */ + pcmk_children[next_child].check_count = 0; + } + } else { + crm_notice("%s[%lld] is unresponsive to ipc after %d tries", + pcmk_children[next_child].name, + (long long) PCMK__SPECIAL_PID_AS_0( + pcmk_children[next_child].pid), + pcmk_children[next_child].check_count); + if (pcmk_children[next_child].respawn) { + /* as long as the respawn-limit isn't reached + and we haven't run out of connect retries + we account this as progress we are willing + to tell to sbd + */ + subdaemon_check_progress = time(NULL); + } + } + /* go to the next child and see if + we can make progress there + */ + break; + case pcmk_rc_ipc_unresponsive: + if (!pcmk_children[next_child].respawn) { + /* if a subdaemon is down and we don't want it + to be restarted this is a success during + shutdown. if it isn't restarted anymore + due to MAX_RESPAWN it is + rather no success. + */ + if (pcmk_children[next_child].respawn_count <= MAX_RESPAWN) { + subdaemon_check_progress = time(NULL); + } + } + if (!pcmk_children[next_child].active_before_startup) { + crm_trace("found %s[%lld] missing - signal-handler " + "will take care of it", + pcmk_children[next_child].name, + (long long) PCMK__SPECIAL_PID_AS_0( + pcmk_children[next_child].pid)); + break; + } + if (pcmk_children[next_child].respawn) { + crm_err("%s[%lld] terminated", + pcmk_children[next_child].name, + (long long) PCMK__SPECIAL_PID_AS_0( + pcmk_children[next_child].pid)); + } else { + /* orderly shutdown */ + crm_notice("%s[%lld] terminated", + pcmk_children[next_child].name, + (long long) PCMK__SPECIAL_PID_AS_0( + pcmk_children[next_child].pid)); + } + pcmk_process_exit(&(pcmk_children[next_child])); + break; + default: + crm_exit(CRM_EX_FATAL); + break; /* static analysis/noreturn */ + } + + next_child++; + if (next_child >= PCMK__NELEM(pcmk_children)) { + next_child = 0; + } + + return G_SOURCE_CONTINUE; +} + +static gboolean +escalate_shutdown(gpointer data) +{ + pcmk_child_t *child = data; + + if (child->pid == PCMK__SPECIAL_PID) { + pcmk_process_exit(child); + + } else if (child->pid != 0) { + /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */ + crm_err("Child %s not terminating in a timely manner, forcing", child->name); + stop_child(child, SIGSEGV); + } + return FALSE; +} + +static void +pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) +{ + pcmk_child_t *child = mainloop_child_userdata(p); + const char *name = mainloop_child_name(p); + + if (signo) { + do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR), + "%s[%d] terminated with signal %d (%s)%s", + name, pid, signo, strsignal(signo), + (core? " and dumped core" : "")); + + } else { + switch(exitcode) { + case CRM_EX_OK: + crm_info("%s[%d] exited with status %d (%s)", + name, pid, exitcode, crm_exit_str(exitcode)); + break; + + case CRM_EX_FATAL: + crm_warn("Shutting cluster down because %s[%d] had fatal failure", + name, pid); + child->respawn = false; + fatal_error = TRUE; + pcmk_shutdown(SIGTERM); + break; + + case CRM_EX_PANIC: + crm_emerg("%s[%d] instructed the machine to reset", name, pid); + child->respawn = false; + fatal_error = TRUE; + pcmk__panic(__func__); + pcmk_shutdown(SIGTERM); + break; + + default: + crm_err("%s[%d] exited with status %d (%s)", + name, pid, exitcode, crm_exit_str(exitcode)); + break; + } + } + + pcmk_process_exit(child); +} + +static void +pcmk_process_exit(pcmk_child_t * child) +{ + child->pid = 0; + child->active_before_startup = false; + child->check_count = 0; + + child->respawn_count += 1; + if (child->respawn_count > MAX_RESPAWN) { + crm_err("Child respawn count exceeded by %s", child->name); + child->respawn = false; + } + + if (shutdown_trigger) { + /* resume step-wise shutdown (returned TRUE yields no parallelizing) */ + mainloop_set_trigger(shutdown_trigger); + + } else if (!child->respawn) { + /* nothing to do */ + + } else if (crm_is_true(getenv("PCMK_fail_fast"))) { + crm_err("Rebooting system because of %s", child->name); + pcmk__panic(__func__); + + } else if (child_liveness(child) == pcmk_rc_ok) { + crm_warn("One-off suppressing strict respawning of a child process %s," + " appears alright per %s IPC end-point", + child->name, child->endpoint); + + } else if (child->needs_cluster && !pcmkd_cluster_connected()) { + crm_notice("Not respawning %s subdaemon until cluster returns", + child->name); + child->needs_retry = true; + + } else { + crm_notice("Respawning %s subdaemon after unexpected exit", + child->name); + start_child(child); + } +} + +static gboolean +pcmk_shutdown_worker(gpointer user_data) +{ + static int phase = PCMK__NELEM(pcmk_children) - 1; + static time_t next_log = 0; + + if (phase == PCMK__NELEM(pcmk_children) - 1) { + crm_notice("Shutting down Pacemaker"); + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN; + } + + for (; phase >= 0; phase--) { + pcmk_child_t *child = &(pcmk_children[phase]); + + if (child->pid != 0) { + time_t now = time(NULL); + + if (child->respawn) { + if (child->pid == PCMK__SPECIAL_PID) { + crm_warn("The process behind %s IPC cannot be" + " terminated, so either wait the graceful" + " period of %ld s for its native termination" + " if it vitally depends on some other daemons" + " going down in a controlled way already," + " or locate and kill the correct %s process" + " on your own; set PCMK_fail_fast=1 to avoid" + " this altogether next time around", + child->name, (long) SHUTDOWN_ESCALATION_PERIOD, + child->command); + } + next_log = now + 30; + child->respawn = false; + stop_child(child, SIGTERM); + if (phase < PCMK_CHILD_CONTROLD) { + g_timeout_add(SHUTDOWN_ESCALATION_PERIOD, + escalate_shutdown, child); + } + + } else if (now >= next_log) { + next_log = now + 30; + crm_notice("Still waiting for %s to terminate " + CRM_XS " pid=%lld", + child->name, (long long) child->pid); + } + return TRUE; + } + + /* cleanup */ + crm_debug("%s confirmed stopped", child->name); + child->pid = 0; + } + + crm_notice("Shutdown complete"); + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE; + if (!fatal_error && running_with_sbd && + pcmk__get_sbd_sync_resource_startup() && + !shutdown_complete_state_reported_client_closed) { + crm_notice("Waiting for SBD to pick up shutdown-complete-state."); + return TRUE; + } + + { + const char *delay = pcmk__env_option(PCMK__ENV_SHUTDOWN_DELAY); + if(delay) { + sync(); + pcmk__sleep_ms(crm_get_msec(delay)); + } + } + + g_main_loop_quit(mainloop); + + if (fatal_error) { + crm_notice("Shutting down and staying down after fatal error"); +#ifdef SUPPORT_COROSYNC + pcmkd_shutdown_corosync(); +#endif + crm_exit(CRM_EX_FATAL); + } + + return TRUE; +} + +/* TODO once libqb is taught to juggle with IPC end-points carried over as + bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325) + it shall hand over these descriptors here if/once they are successfully + pre-opened in (presumably) child_liveness(), to avoid any remaining + room for races */ + // \return Standard Pacemaker return code +static int +start_child(pcmk_child_t * child) +{ + uid_t uid = 0; + gid_t gid = 0; + gboolean use_valgrind = FALSE; + gboolean use_callgrind = FALSE; + const char *env_valgrind = getenv("PCMK_valgrind_enabled"); + const char *env_callgrind = getenv("PCMK_callgrind_enabled"); + + child->active_before_startup = false; + child->check_count = 0; + + if (child->command == NULL) { + crm_info("Nothing to do for child \"%s\"", child->name); + return pcmk_rc_ok; + } + + if (env_callgrind != NULL && crm_is_true(env_callgrind)) { + use_callgrind = TRUE; + use_valgrind = TRUE; + + } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) { + use_callgrind = TRUE; + use_valgrind = TRUE; + + } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) { + use_valgrind = TRUE; + + } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) { + use_valgrind = TRUE; + } + + if (use_valgrind && strlen(VALGRIND_BIN) == 0) { + crm_warn("Cannot enable valgrind for %s:" + " The location of the valgrind binary is unknown", child->name); + use_valgrind = FALSE; + } + + if (child->uid) { + if (crm_user_lookup(child->uid, &uid, &gid) < 0) { + crm_err("Invalid user (%s) for %s: not found", child->uid, child->name); + return EACCES; + } + crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name); + } + + child->pid = fork(); + CRM_ASSERT(child->pid != -1); + + if (child->pid > 0) { + /* parent */ + mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit); + + crm_info("Forked child %lld for process %s%s", + (long long) child->pid, child->name, + use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : ""); + return pcmk_rc_ok; + + } else { + /* Start a new session */ + (void)setsid(); + + /* Setup the two alternate arg arrays */ + opts_vgrind[0] = strdup(VALGRIND_BIN); + if (use_callgrind) { + opts_vgrind[1] = strdup("--tool=callgrind"); + opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p"); + opts_vgrind[3] = strdup(child->command); + opts_vgrind[4] = NULL; + } else { + opts_vgrind[1] = strdup(child->command); + opts_vgrind[2] = NULL; + opts_vgrind[3] = NULL; + opts_vgrind[4] = NULL; + } + opts_default[0] = strdup(child->command); + + if(gid) { + // Drop root group access if not needed + if (!need_root_group && (setgid(gid) < 0)) { + crm_warn("Could not set group to %d: %s", gid, strerror(errno)); + } + + /* Initialize supplementary groups to only those always granted to + * the user, plus haclient (so we can access IPC). + */ + if (initgroups(child->uid, gid) < 0) { + crm_err("Cannot initialize groups for %s: %s (%d)", + child->uid, pcmk_rc_str(errno), errno); + } + } + + if (uid && setuid(uid) < 0) { + crm_warn("Could not set user to %s (id %d): %s", + child->uid, uid, strerror(errno)); + } + + pcmk__close_fds_in_child(true); + + pcmk__open_devnull(O_RDONLY); // stdin (fd 0) + pcmk__open_devnull(O_WRONLY); // stdout (fd 1) + pcmk__open_devnull(O_WRONLY); // stderr (fd 2) + + if (use_valgrind) { + (void)execvp(VALGRIND_BIN, opts_vgrind); + } else { + (void)execvp(child->command, opts_default); + } + crm_crit("Could not execute %s: %s", child->command, strerror(errno)); + crm_exit(CRM_EX_FATAL); + } + return pcmk_rc_ok; /* never reached */ +} + +/*! + * \internal + * \brief Check the liveness of the child based on IPC name and PID if tracked + * + * \param[in,out] child Child tracked data + * + * \return Standard Pacemaker return code + * + * \note Return codes of particular interest include pcmk_rc_ipc_unresponsive + * indicating that no trace of IPC liveness was detected, + * pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by + * an unauthorized process, and pcmk_rc_ipc_pid_only indicating that + * the child is up by PID but not IPC end-point (possibly starting). + * \note This function doesn't modify any of \p child members but \c pid, + * and is not actively toying with processes as such but invoking + * \c stop_child in one particular case (there's for some reason + * a different authentic holder of the IPC end-point). + */ +static int +child_liveness(pcmk_child_t *child) +{ + uid_t cl_uid = 0; + gid_t cl_gid = 0; + const uid_t root_uid = 0; + const gid_t root_gid = 0; + const uid_t *ref_uid; + const gid_t *ref_gid; + int rc = pcmk_rc_ipc_unresponsive; + pid_t ipc_pid = 0; + + if (child->endpoint == NULL + && (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) { + crm_err("Cannot track child %s for missing both API end-point and PID", + child->name); + rc = EINVAL; // Misuse of function when child is not trackable + + } else if (child->endpoint != NULL) { + int legacy_rc = pcmk_ok; + + if (child->uid == NULL) { + ref_uid = &root_uid; + ref_gid = &root_gid; + } else { + ref_uid = &cl_uid; + ref_gid = &cl_gid; + legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid); + } + + if (legacy_rc < 0) { + rc = pcmk_legacy2rc(legacy_rc); + crm_err("Could not find user and group IDs for user %s: %s " + CRM_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc); + } else { + rc = pcmk__ipc_is_authentic_process_active(child->endpoint, + *ref_uid, *ref_gid, + &ipc_pid); + if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) { + if (child->pid <= 0) { + /* If rc is pcmk_rc_ok, ipc_pid is nonzero and this + * initializes a new child. If rc is + * pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will + * investigate further. + */ + child->pid = ipc_pid; + } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) { + /* An unexpected (but authorized) process is responding to + * IPC. Investigate further. + */ + rc = pcmk_rc_ipc_unresponsive; + } + } + } + } + + if (rc == pcmk_rc_ipc_unresponsive) { + /* If we get here, a child without IPC is being tracked, no IPC liveness + * has been detected, or IPC liveness has been detected with an + * unexpected (but authorized) process. This is safe on FreeBSD since + * the only change possible from a proper child's PID into "special" PID + * of 1 behind more loosely related process. + */ + int ret = pcmk__pid_active(child->pid, child->name); + + if (ipc_pid && ((ret != pcmk_rc_ok) + || ipc_pid == PCMK__SPECIAL_PID + || (pcmk__pid_active(ipc_pid, + child->name) == pcmk_rc_ok))) { + /* An unexpected (but authorized) process was detected at the IPC + * endpoint, and either it is active, or the child we're tracking is + * not. + */ + + if (ret == pcmk_rc_ok) { + /* The child we're tracking is active. Kill it, and adopt the + * detected process. This assumes that our children don't fork + * (thus getting a different PID owning the IPC), but rather the + * tracking got out of sync because of some means external to + * Pacemaker, and adopting the detected process is better than + * killing it and possibly having to spawn a new child. + */ + /* not possessing IPC, afterall (what about corosync CPG?) */ + stop_child(child, SIGKILL); + } + rc = pcmk_rc_ok; + child->pid = ipc_pid; + } else if (ret == pcmk_rc_ok) { + // Our tracked child's PID was found active, but not its IPC + rc = pcmk_rc_ipc_pid_only; + } else if ((child->pid == 0) && (ret == EINVAL)) { + // FreeBSD can return EINVAL + rc = pcmk_rc_ipc_unresponsive; + } else { + switch (ret) { + case EACCES: + rc = pcmk_rc_ipc_unauthorized; + break; + case ESRCH: + rc = pcmk_rc_ipc_unresponsive; + break; + default: + rc = ret; + break; + } + } + } + return rc; +} + +/*! + * \internal + * \brief Initial one-off check of the pre-existing "child" processes + * + * With "child" process, we mean the subdaemon that defines an API end-point + * (all of them do as of the comment) -- the possible complement is skipped + * as it is deemed it has no such shared resources to cause conflicts about, + * hence it can presumably be started anew without hesitation. + * If that won't hold true in the future, the concept of a shared resource + * will have to be generalized beyond the API end-point. + * + * For boundary cases that the "child" is still starting (IPC end-point is yet + * to be witnessed), or more rarely (practically FreeBSD only), when there's + * a pre-existing "untrackable" authentic process, we give the situation some + * time to possibly unfold in the right direction, meaning that said socket + * will appear or the unattainable process will disappear per the observable + * IPC, respectively. + * + * \return Standard Pacemaker return code + * + * \note Since this gets run at the very start, \c respawn_count fields + * for particular children get temporarily overloaded with "rounds + * of waiting" tracking, restored once we are about to finish with + * success (i.e. returning value >=0) and will remain unrestored + * otherwise. One way to suppress liveness detection logic for + * particular child is to set the said value to a negative number. + */ +#define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */ +int +find_and_track_existing_processes(void) +{ + bool wait_in_progress; + int rc; + size_t i, rounds; + + for (rounds = 1; rounds <= WAIT_TRIES; rounds++) { + wait_in_progress = false; + for (i = 0; i < PCMK__NELEM(pcmk_children); i++) { + + if ((pcmk_children[i].endpoint == NULL) + || (pcmk_children[i].respawn_count < 0)) { + continue; + } + + rc = child_liveness(&pcmk_children[i]); + if (rc == pcmk_rc_ipc_unresponsive) { + /* As a speculation, don't give up if there are more rounds to + * come for other reasons, but don't artificially wait just + * because of this, since we would preferably start ASAP. + */ + continue; + } + + pcmk_children[i].respawn_count = rounds; + switch (rc) { + case pcmk_rc_ok: + if (pcmk_children[i].pid == PCMK__SPECIAL_PID) { + if (crm_is_true(getenv("PCMK_fail_fast"))) { + crm_crit("Cannot reliably track pre-existing" + " authentic process behind %s IPC on this" + " platform and PCMK_fail_fast requested", + pcmk_children[i].endpoint); + return EOPNOTSUPP; + } else if (pcmk_children[i].respawn_count == WAIT_TRIES) { + crm_notice("Assuming pre-existing authentic, though" + " on this platform untrackable, process" + " behind %s IPC is stable (was in %d" + " previous samples) so rather than" + " bailing out (PCMK_fail_fast not" + " requested), we just switch to a less" + " optimal IPC liveness monitoring" + " (not very suitable for heavy load)", + pcmk_children[i].name, WAIT_TRIES - 1); + crm_warn("The process behind %s IPC cannot be" + " terminated, so the overall shutdown" + " will get delayed implicitly (%ld s)," + " which serves as a graceful period for" + " its native termination if it vitally" + " depends on some other daemons going" + " down in a controlled way already", + pcmk_children[i].name, + (long) SHUTDOWN_ESCALATION_PERIOD); + } else { + wait_in_progress = true; + crm_warn("Cannot reliably track pre-existing" + " authentic process behind %s IPC on this" + " platform, can still disappear in %d" + " attempt(s)", pcmk_children[i].endpoint, + WAIT_TRIES - pcmk_children[i].respawn_count); + continue; + } + } + crm_notice("Tracking existing %s process (pid=%lld)", + pcmk_children[i].name, + (long long) PCMK__SPECIAL_PID_AS_0( + pcmk_children[i].pid)); + pcmk_children[i].respawn_count = -1; /* 0~keep watching */ + pcmk_children[i].active_before_startup = true; + break; + case pcmk_rc_ipc_pid_only: + if (pcmk_children[i].respawn_count == WAIT_TRIES) { + crm_crit("%s IPC end-point for existing authentic" + " process %lld did not (re)appear", + pcmk_children[i].endpoint, + (long long) PCMK__SPECIAL_PID_AS_0( + pcmk_children[i].pid)); + return rc; + } + wait_in_progress = true; + crm_warn("Cannot find %s IPC end-point for existing" + " authentic process %lld, can still (re)appear" + " in %d attempts (?)", + pcmk_children[i].endpoint, + (long long) PCMK__SPECIAL_PID_AS_0( + pcmk_children[i].pid), + WAIT_TRIES - pcmk_children[i].respawn_count); + continue; + default: + crm_crit("Checked liveness of %s: %s " CRM_XS " rc=%d", + pcmk_children[i].name, pcmk_rc_str(rc), rc); + return rc; + } + } + if (!wait_in_progress) { + break; + } + pcmk__sleep_ms(250); // Wait a bit for changes to possibly happen + } + for (i = 0; i < PCMK__NELEM(pcmk_children); i++) { + pcmk_children[i].respawn_count = 0; /* restore pristine state */ + } + + g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon, + NULL); + return pcmk_rc_ok; +} + +gboolean +init_children_processes(void *user_data) +{ + if (is_corosync_cluster()) { + /* Corosync clusters can drop root group access, because we set + * uidgid.gid.${gid}=1 via CMAP, which allows these processes to connect + * to corosync. + */ + need_root_group = false; + } + + /* start any children that have not been detected */ + for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) { + if (pcmk_children[i].pid != 0) { + /* we are already tracking it */ + continue; + } + + start_child(&(pcmk_children[i])); + } + + /* From this point on, any daemons being started will be due to + * respawning rather than node start. + * + * This may be useful for the daemons to know + */ + setenv("PCMK_respawned", "true", 1); + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING; + return TRUE; +} + +void +pcmk_shutdown(int nsig) +{ + if (shutdown_trigger == NULL) { + shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL); + } + mainloop_set_trigger(shutdown_trigger); +} + +void +restart_cluster_subdaemons(void) +{ + for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) { + if (!pcmk_children[i].needs_retry || pcmk_children[i].pid != 0) { + continue; + } + + crm_notice("Respawning cluster-based subdaemon: %s", pcmk_children[i].name); + if (start_child(&pcmk_children[i])) { + pcmk_children[i].needs_retry = false; + } + } +} + +static gboolean +stop_child(pcmk_child_t * child, int signal) +{ + if (signal == 0) { + signal = SIGTERM; + } + + /* why to skip PID of 1? + - FreeBSD ~ how untrackable process behind IPC is masqueraded as + - elsewhere: how "init" task is designated; in particular, in systemd + arrangement of socket-based activation, this is pretty real */ + if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) { + crm_debug("Nothing to do for child \"%s\" (process %lld)", + child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid)); + return TRUE; + } + + if (child->pid <= 0) { + crm_trace("Client %s not running", child->name); + return TRUE; + } + + errno = 0; + if (kill(child->pid, signal) == 0) { + crm_notice("Stopping %s "CRM_XS" sent signal %d to process %lld", + child->name, signal, (long long) child->pid); + + } else { + crm_err("Could not stop %s (process %lld) with signal %d: %s", + child->name, (long long) child->pid, signal, strerror(errno)); + } + + return TRUE; +} + diff --git a/daemons/schedulerd/Makefile.am b/daemons/schedulerd/Makefile.am new file mode 100644 index 0000000..57e819b --- /dev/null +++ b/daemons/schedulerd/Makefile.am @@ -0,0 +1,53 @@ +# +# Copyright 2004-2021 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. +# + +include $(top_srcdir)/mk/common.mk +include $(top_srcdir)/mk/man.mk + +AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir) + +halibdir = $(CRM_DAEMON_DIR) + +## binary progs + +halib_PROGRAMS = pacemaker-schedulerd + +if BUILD_XML_HELP +man7_MANS = pacemaker-schedulerd.7 +endif + +## SOURCES + +noinst_HEADERS = pacemaker-schedulerd.h + +pacemaker_schedulerd_CFLAGS = $(CFLAGS_HARDENED_EXE) +pacemaker_schedulerd_LDFLAGS = $(LDFLAGS_HARDENED_EXE) +pacemaker_schedulerd_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ + $(top_builddir)/lib/pengine/libpe_status.la \ + $(top_builddir)/lib/pacemaker/libpacemaker.la +# libcib for get_object_root() +pacemaker_schedulerd_SOURCES = pacemaker-schedulerd.c +pacemaker_schedulerd_SOURCES += schedulerd_messages.c + +install-exec-local: + $(INSTALL) -d -m 750 $(DESTDIR)/$(PE_STATE_DIR) + -chown $(CRM_DAEMON_USER):$(CRM_DAEMON_GROUP) $(DESTDIR)/$(PE_STATE_DIR) + +if BUILD_LEGACY_LINKS +install-exec-hook: + cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f pengine && $(LN_S) pacemaker-schedulerd pengine + +uninstall-hook: + cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f pengine +endif + +uninstall-local: + -rmdir $(DESTDIR)/$(PE_STATE_DIR) + +CLEANFILES = $(man7_MANS) diff --git a/daemons/schedulerd/pacemaker-schedulerd.c b/daemons/schedulerd/pacemaker-schedulerd.c new file mode 100644 index 0000000..3f2a3e8 --- /dev/null +++ b/daemons/schedulerd/pacemaker-schedulerd.c @@ -0,0 +1,181 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include "pacemaker-schedulerd.h" + +#define SUMMARY "pacemaker-schedulerd - daemon for calculating a Pacemaker cluster's response to events" + +struct { + gchar **remainder; +} options; + +pcmk__output_t *logger_out = NULL; +pcmk__output_t *out = NULL; + +static GMainLoop *mainloop = NULL; +static qb_ipcs_service_t *ipcs = NULL; +static crm_exit_t exit_code = CRM_EX_OK; + +pcmk__supported_format_t formats[] = { + PCMK__SUPPORTED_FORMAT_NONE, + PCMK__SUPPORTED_FORMAT_TEXT, + PCMK__SUPPORTED_FORMAT_XML, + { NULL, NULL, NULL } +}; + +void pengine_shutdown(int nsig); + +static GOptionContext * +build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { + GOptionContext *context = NULL; + + GOptionEntry extra_prog_entries[] = { + { G_OPTION_REMAINING, 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING_ARRAY, &options.remainder, + NULL, + NULL }, + + { NULL } + }; + + context = pcmk__build_arg_context(args, "text (default), xml", group, + "[metadata]"); + pcmk__add_main_args(context, extra_prog_entries); + return context; +} + +int +main(int argc, char **argv) +{ + GError *error = NULL; + int rc = pcmk_rc_ok; + + GOptionGroup *output_group = NULL; + pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); + gchar **processed_args = pcmk__cmdline_preproc(argv, NULL); + GOptionContext *context = build_arg_context(args, &output_group); + + crm_log_preinit(NULL, argc, argv); + mainloop_add_signal(SIGTERM, pengine_shutdown); + + pcmk__register_formats(output_group, formats); + if (!g_option_context_parse_strv(context, &processed_args, &error)) { + exit_code = CRM_EX_USAGE; + goto done; + } + + rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); + if ((rc != pcmk_rc_ok) || (out == NULL)) { + exit_code = CRM_EX_FATAL; + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", + args->output_ty, pcmk_rc_str(rc)); + goto done; + } + + pe__register_messages(out); + pcmk__register_lib_messages(out); + + if (options.remainder) { + if (g_strv_length(options.remainder) == 1 && + pcmk__str_eq("metadata", options.remainder[0], pcmk__str_casei)) { + pe_metadata(out); + goto done; + } else { + exit_code = CRM_EX_USAGE; + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "Unsupported extra command line parameters"); + goto done; + } + } + + if (args->version) { + out->version(out, false); + goto done; + } + + pcmk__cli_init_logging("pacemaker-schedulerd", args->verbosity); + crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); + crm_notice("Starting Pacemaker scheduler"); + + if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) { + crm_err("Terminating due to bad permissions on " PE_STATE_DIR); + exit_code = CRM_EX_FATAL; + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "ERROR: Bad permissions on %s (see logs for details)", PE_STATE_DIR); + goto done; + } + + ipcs = pcmk__serve_schedulerd_ipc(&ipc_callbacks); + if (ipcs == NULL) { + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "Failed to create pacemaker-schedulerd server: exiting and inhibiting respawn"); + exit_code = CRM_EX_FATAL; + goto done; + } + + if (pcmk__log_output_new(&logger_out) != pcmk_rc_ok) { + exit_code = CRM_EX_FATAL; + goto done; + } + pe__register_messages(logger_out); + pcmk__register_lib_messages(logger_out); + pcmk__output_set_log_level(logger_out, LOG_TRACE); + + /* Create the mainloop and run it... */ + mainloop = g_main_loop_new(NULL, FALSE); + crm_notice("Pacemaker scheduler successfully started and accepting connections"); + g_main_loop_run(mainloop); + +done: + g_strfreev(options.remainder); + g_strfreev(processed_args); + pcmk__free_arg_context(context); + + pcmk__output_and_clear_error(&error, out); + pengine_shutdown(0); +} + +void +pengine_shutdown(int nsig) +{ + if (ipcs != NULL) { + crm_trace("Closing IPC server"); + mainloop_del_ipc_server(ipcs); + ipcs = NULL; + } + + if (logger_out != NULL) { + logger_out->finish(logger_out, exit_code, true, NULL); + pcmk__output_free(logger_out); + logger_out = NULL; + } + + if (out != NULL) { + out->finish(out, exit_code, true, NULL); + pcmk__output_free(out); + out = NULL; + } + + pcmk__unregister_formats(); + crm_exit(exit_code); +} diff --git a/daemons/schedulerd/pacemaker-schedulerd.h b/daemons/schedulerd/pacemaker-schedulerd.h new file mode 100644 index 0000000..cbb07e1 --- /dev/null +++ b/daemons/schedulerd/pacemaker-schedulerd.h @@ -0,0 +1,20 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#ifndef PCMK__PACEMAKER_SCHEDULERD__H +#define PCMK__PACEMAKER_SCHEDULERD__H + +#include +#include + +extern pcmk__output_t *logger_out; +extern pcmk__output_t *out; +extern struct qb_ipcs_service_handlers ipc_callbacks; + +#endif diff --git a/daemons/schedulerd/schedulerd_messages.c b/daemons/schedulerd/schedulerd_messages.c new file mode 100644 index 0000000..1c124d2 --- /dev/null +++ b/daemons/schedulerd/schedulerd_messages.c @@ -0,0 +1,335 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include "pacemaker-schedulerd.h" + +static GHashTable *schedulerd_handlers = NULL; + +static pe_working_set_t * +init_working_set(void) +{ + pe_working_set_t *data_set = pe_new_working_set(); + + CRM_ASSERT(data_set != NULL); + + crm_config_error = FALSE; + crm_config_warning = FALSE; + + was_processing_error = FALSE; + was_processing_warning = FALSE; + + data_set->priv = logger_out; + return data_set; +} + +static xmlNode * +handle_pecalc_request(pcmk__request_t *request) +{ + static struct series_s { + const char *name; + const char *param; + + /* Maximum number of inputs of this kind to save to disk. + * If -1, save all; if 0, save none. + */ + int wrap; + } series[] = { + { "pe-error", "pe-error-series-max", -1 }, + { "pe-warn", "pe-warn-series-max", 5000 }, + { "pe-input", "pe-input-series-max", 4000 }, + }; + + xmlNode *msg = request->xml; + xmlNode *xml_data = get_message_xml(msg, F_CRM_DATA); + + static char *last_digest = NULL; + static char *filename = NULL; + + unsigned int seq; + int series_id = 0; + int series_wrap = 0; + char *digest = NULL; + const char *value = NULL; + time_t execution_date = time(NULL); + xmlNode *converted = NULL; + xmlNode *reply = NULL; + bool is_repoke = false; + bool process = true; + pe_working_set_t *data_set = init_working_set(); + + pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, + "ack", NULL, CRM_EX_INDETERMINATE); + + digest = calculate_xml_versioned_digest(xml_data, FALSE, FALSE, + CRM_FEATURE_SET); + converted = copy_xml(xml_data); + if (!cli_config_update(&converted, NULL, TRUE)) { + data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); + crm_xml_add_int(data_set->graph, "transition_id", 0); + crm_xml_add_int(data_set->graph, "cluster-delay", 0); + process = false; + free(digest); + + } else if (pcmk__str_eq(digest, last_digest, pcmk__str_casei)) { + is_repoke = true; + free(digest); + + } else { + free(last_digest); + last_digest = digest; + } + + if (process) { + pcmk__schedule_actions(converted, + pe_flag_no_counts + |pe_flag_no_compat + |pe_flag_show_utilization, data_set); + } + + // Get appropriate index into series[] array + if (was_processing_error) { + series_id = 0; + } else if (was_processing_warning) { + series_id = 1; + } else { + series_id = 2; + } + + value = pe_pref(data_set->config_hash, series[series_id].param); + if ((value == NULL) + || (pcmk__scan_min_int(value, &series_wrap, -1) != pcmk_rc_ok)) { + series_wrap = series[series_id].wrap; + } + + if (pcmk__read_series_sequence(PE_STATE_DIR, series[series_id].name, + &seq) != pcmk_rc_ok) { + // @TODO maybe handle errors better ... + seq = 0; + } + crm_trace("Series %s: wrap=%d, seq=%u, pref=%s", + series[series_id].name, series_wrap, seq, value); + + data_set->input = NULL; + reply = create_reply(msg, data_set->graph); + + if (reply == NULL) { + pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, + "Failed building ping reply for client %s", + pcmk__client_name(request->ipc_client)); + goto done; + } + + if (series_wrap == 0) { // Don't save any inputs of this kind + free(filename); + filename = NULL; + + } else if (!is_repoke) { // Input changed, save to disk + free(filename); + filename = pcmk__series_filename(PE_STATE_DIR, + series[series_id].name, seq, true); + } + + crm_xml_add(reply, F_CRM_TGRAPH_INPUT, filename); + crm_xml_add_int(reply, PCMK__XA_GRAPH_ERRORS, was_processing_error); + crm_xml_add_int(reply, PCMK__XA_GRAPH_WARNINGS, was_processing_warning); + crm_xml_add_int(reply, PCMK__XA_CONFIG_ERRORS, crm_config_error); + crm_xml_add_int(reply, PCMK__XA_CONFIG_WARNINGS, crm_config_warning); + + pcmk__log_transition_summary(filename); + + if (series_wrap == 0) { + crm_debug("Not saving input to disk (disabled by configuration)"); + + } else if (is_repoke) { + crm_info("Input has not changed since last time, not saving to disk"); + + } else { + unlink(filename); + crm_xml_add_ll(xml_data, "execution-date", (long long) execution_date); + write_xml_file(xml_data, filename, TRUE); + pcmk__write_series_sequence(PE_STATE_DIR, series[series_id].name, + ++seq, series_wrap); + } + + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + +done: + free_xml(converted); + pe_free_working_set(data_set); + + return reply; +} + +static xmlNode * +handle_unknown_request(pcmk__request_t *request) +{ + pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, + "ack", NULL, CRM_EX_INVALID_PARAM); + + pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, + "Unknown IPC request type '%s' (bug?)", + pcmk__client_name(request->ipc_client)); + return NULL; +} + +static xmlNode * +handle_hello_request(pcmk__request_t *request) +{ + pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, + "ack", NULL, CRM_EX_INDETERMINATE); + + crm_trace("Received IPC hello from %s", pcmk__client_name(request->ipc_client)); + + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; +} + +static void +schedulerd_register_handlers(void) +{ + pcmk__server_command_t handlers[] = { + { CRM_OP_HELLO, handle_hello_request }, + { CRM_OP_PECALC, handle_pecalc_request }, + { NULL, handle_unknown_request }, + }; + + schedulerd_handlers = pcmk__register_handlers(handlers); +} + +static int32_t +pe_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) +{ + crm_trace("Connection %p", c); + if (pcmk__new_client(c, uid, gid) == NULL) { + return -EIO; + } + return 0; +} + +static int32_t +pe_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) +{ + uint32_t id = 0; + uint32_t flags = 0; + xmlNode *msg = NULL; + pcmk__client_t *c = pcmk__find_client(qbc); + const char *sys_to = NULL; + + CRM_CHECK(c != NULL, return 0); + + if (schedulerd_handlers == NULL) { + schedulerd_register_handlers(); + } + + msg = pcmk__client_data2xml(c, data, &id, &flags); + if (msg == NULL) { + pcmk__ipc_send_ack(c, id, flags, "ack", NULL, CRM_EX_PROTOCOL); + return 0; + } + + sys_to = crm_element_value(msg, F_CRM_SYS_TO); + + if (pcmk__str_eq(crm_element_value(msg, F_CRM_MSG_TYPE), + XML_ATTR_RESPONSE, pcmk__str_none)) { + pcmk__ipc_send_ack(c, id, flags, "ack", NULL, CRM_EX_INDETERMINATE); + crm_info("Ignoring IPC reply from %s", pcmk__client_name(c)); + + } else if (!pcmk__str_eq(sys_to, CRM_SYSTEM_PENGINE, pcmk__str_none)) { + pcmk__ipc_send_ack(c, id, flags, "ack", NULL, CRM_EX_INDETERMINATE); + crm_info("Ignoring invalid IPC message: to '%s' not " + CRM_SYSTEM_PENGINE, pcmk__s(sys_to, "")); + + } else { + char *log_msg = NULL; + const char *reason = NULL; + xmlNode *reply = NULL; + + pcmk__request_t request = { + .ipc_client = c, + .ipc_id = id, + .ipc_flags = flags, + .peer = NULL, + .xml = msg, + .call_options = 0, + .result = PCMK__UNKNOWN_RESULT, + }; + + request.op = crm_element_value_copy(request.xml, F_CRM_TASK); + CRM_CHECK(request.op != NULL, return 0); + + reply = pcmk__process_request(&request, schedulerd_handlers); + + if (reply != NULL) { + pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event); + free_xml(reply); + } + + reason = request.result.exit_reason; + + log_msg = crm_strdup_printf("Processed %s request from %s %s: %s%s%s%s", + request.op, pcmk__request_origin_type(&request), + pcmk__request_origin(&request), + pcmk_exec_status_str(request.result.execution_status), + (reason == NULL)? "" : " (", + (reason == NULL)? "" : reason, + (reason == NULL)? "" : ")"); + + if (!pcmk__result_ok(&request.result)) { + crm_warn("%s", log_msg); + } else { + crm_debug("%s", log_msg); + } + + free(log_msg); + pcmk__reset_request(&request); + } + + free_xml(msg); + return 0; +} + +/* Error code means? */ +static int32_t +pe_ipc_closed(qb_ipcs_connection_t * c) +{ + pcmk__client_t *client = pcmk__find_client(c); + + if (client == NULL) { + return 0; + } + crm_trace("Connection %p", c); + pcmk__free_client(client); + return 0; +} + +static void +pe_ipc_destroy(qb_ipcs_connection_t * c) +{ + crm_trace("Connection %p", c); + pe_ipc_closed(c); +} + +struct qb_ipcs_service_handlers ipc_callbacks = { + .connection_accept = pe_ipc_accept, + .connection_created = NULL, + .msg_process = pe_ipc_dispatch, + .connection_closed = pe_ipc_closed, + .connection_destroyed = pe_ipc_destroy +}; -- cgit v1.2.3