diff options
Diffstat (limited to 'daemons')
79 files changed, 3560 insertions, 3114 deletions
diff --git a/daemons/Makefile.am b/daemons/Makefile.am index 743320b..30dd17e 100644 --- a/daemons/Makefile.am +++ b/daemons/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright 2018 the Pacemaker project contributors +# Copyright 2018-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # @@ -8,4 +8,10 @@ # MAINTAINERCLEANFILES = Makefile.in -SUBDIRS = based schedulerd attrd controld execd fenced pacemakerd +SUBDIRS = based \ + schedulerd \ + attrd \ + controld \ + execd \ + fenced \ + pacemakerd diff --git a/daemons/attrd/Makefile.am b/daemons/attrd/Makefile.am index 6bb81c4..f8d8bc9 100644 --- a/daemons/attrd/Makefile.am +++ b/daemons/attrd/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright 2004-2022 the Pacemaker project contributors +# Copyright 2004-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # @@ -18,31 +18,32 @@ noinst_HEADERS = pacemaker-attrd.h pacemaker_attrd_CFLAGS = $(CFLAGS_HARDENED_EXE) pacemaker_attrd_LDFLAGS = $(LDFLAGS_HARDENED_EXE) -pacemaker_attrd_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \ - $(top_builddir)/lib/pengine/libpe_rules.la \ - $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/cib/libcib.la \ - $(top_builddir)/lib/lrmd/liblrmd.la \ - $(CLUSTERLIBS) +pacemaker_attrd_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la +pacemaker_attrd_LDADD += $(top_builddir)/lib/cib/libcib.la +pacemaker_attrd_LDADD += $(top_builddir)/lib/pengine/libpe_rules.la +pacemaker_attrd_LDADD += $(top_builddir)/lib/lrmd/liblrmd.la +pacemaker_attrd_LDADD += $(top_builddir)/lib/common/libcrmcommon.la +pacemaker_attrd_LDADD += $(CLUSTERLIBS) pacemaker_attrd_SOURCES = attrd_alerts.c \ - attrd_attributes.c \ - attrd_cib.c \ - attrd_corosync.c \ - attrd_elections.c \ - attrd_ipc.c \ - attrd_messages.c \ - attrd_sync.c \ - attrd_utils.c \ - pacemaker-attrd.c - -clean-generic: - rm -f *.log *.debug *.xml *~ - -if BUILD_LEGACY_LINKS + attrd_attributes.c \ + attrd_cib.c \ + attrd_corosync.c \ + attrd_elections.c \ + attrd_ipc.c \ + attrd_messages.c \ + attrd_sync.c \ + attrd_utils.c \ + pacemaker-attrd.c + +.PHONY: install-exec-hook install-exec-hook: +if BUILD_LEGACY_LINKS cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f attrd && $(LN_S) pacemaker-attrd attrd +endif +.PHONY: uninstall-hook uninstall-hook: +if BUILD_LEGACY_LINKS cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f attrd endif diff --git a/daemons/attrd/attrd_alerts.c b/daemons/attrd/attrd_alerts.c index b694891..495e18f 100644 --- a/daemons/attrd/attrd_alerts.c +++ b/daemons/attrd/attrd_alerts.c @@ -1,5 +1,5 @@ /* - * Copyright 2015-2021 the Pacemaker project contributors + * Copyright 2015-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -14,6 +14,7 @@ #include <crm/cluster/internal.h> #include <crm/cluster/election_internal.h> #include <crm/common/alerts_internal.h> +#include <crm/common/cib_internal.h> #include <crm/pengine/rules_internal.h> #include <crm/lrmd_internal.h> #include "pacemaker-attrd.h" @@ -92,7 +93,7 @@ config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void } crmalerts = output; - if (crmalerts && !pcmk__str_eq(crm_element_name(crmalerts), XML_CIB_TAG_ALERTS, pcmk__str_none)) { + if ((crmalerts != NULL) && !pcmk__xe_is(crmalerts, XML_CIB_TAG_ALERTS)) { crmalerts = first_named_child(crmalerts, XML_CIB_TAG_ALERTS); } if (!crmalerts) { @@ -104,9 +105,6 @@ config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void attrd_alert_list = pe_unpack_alerts(crmalerts); } -#define XPATH_ALERTS \ - "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_ALERTS - gboolean attrd_read_options(gpointer user_data) { @@ -114,8 +112,9 @@ attrd_read_options(gpointer user_data) CRM_CHECK(the_cib != NULL, return TRUE); - call_id = the_cib->cmds->query(the_cib, XPATH_ALERTS, NULL, - cib_xpath | cib_scope_local); + call_id = the_cib->cmds->query(the_cib, + pcmk__cib_abs_xpath_for(XML_CIB_TAG_ALERTS), + NULL, cib_xpath|cib_scope_local); the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, NULL, "config_query_callback", @@ -125,14 +124,6 @@ attrd_read_options(gpointer user_data) return TRUE; } -void -attrd_cib_updated_cb(const char *event, xmlNode * msg) -{ - if (!attrd_shutting_down() && pcmk__alert_in_patchset(msg, false)) { - mainloop_set_trigger(attrd_config_read); - } -} - int attrd_send_attribute_alert(const char *node, int nodeid, const char *attr, const char *value) diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c index 516ced7..388c181 100644 --- a/daemons/attrd/attrd_attributes.c +++ b/daemons/attrd/attrd_attributes.c @@ -25,25 +25,45 @@ static attribute_t * attrd_create_attribute(xmlNode *xml) { + int is_private = 0; int dampen = 0; - const char *value = crm_element_value(xml, PCMK__XA_ATTR_DAMPENING); - attribute_t *a = calloc(1, sizeof(attribute_t)); + const char *name = crm_element_value(xml, PCMK__XA_ATTR_NAME); + const char *set_type = crm_element_value(xml, PCMK__XA_ATTR_SET_TYPE); + const char *dampen_s = crm_element_value(xml, PCMK__XA_ATTR_DAMPENING); + attribute_t *a = NULL; + + if (set_type == NULL) { + set_type = XML_TAG_ATTR_SETS; + } + + /* Set type is meaningful only when writing to the CIB. Private + * attributes are not written. + */ + crm_element_value_int(xml, PCMK__XA_ATTR_IS_PRIVATE, &is_private); + if ((is_private != 0) + && !pcmk__str_any_of(set_type, XML_TAG_ATTR_SETS, XML_TAG_UTILIZATION, + NULL)) { + crm_warn("Ignoring attribute %s with invalid set type %s", + pcmk__s(name, "(unidentified)"), set_type); + return NULL; + } + a = calloc(1, sizeof(attribute_t)); CRM_ASSERT(a != NULL); - a->id = crm_element_value_copy(xml, PCMK__XA_ATTR_NAME); - a->set_id = crm_element_value_copy(xml, PCMK__XA_ATTR_SET); - a->set_type = crm_element_value_copy(xml, PCMK__XA_ATTR_SET_TYPE); - a->uuid = crm_element_value_copy(xml, PCMK__XA_ATTR_UUID); - a->values = pcmk__strikey_table(NULL, attrd_free_attribute_value); + a->is_private = is_private; + pcmk__str_update(&a->id, name); + pcmk__str_update(&a->set_type, set_type); - crm_element_value_int(xml, PCMK__XA_ATTR_IS_PRIVATE, &a->is_private); + a->set_id = crm_element_value_copy(xml, PCMK__XA_ATTR_SET); + a->uuid = crm_element_value_copy(xml, PCMK__XA_ATTR_UUID); + a->values = pcmk__strikey_table(NULL, attrd_free_attribute_value); a->user = crm_element_value_copy(xml, PCMK__XA_ATTR_USER); crm_trace("Performing all %s operations as user '%s'", a->id, a->user); - if (value != NULL) { - dampen = crm_get_msec(value); + if (dampen_s != NULL) { + dampen = crm_get_msec(dampen_s); } crm_trace("Created attribute %s with %s write delay", a->id, (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms)); @@ -52,7 +72,7 @@ attrd_create_attribute(xmlNode *xml) a->timeout_ms = dampen; a->timer = attrd_add_timer(a->id, a->timeout_ms, a); } else if (dampen < 0) { - crm_warn("Ignoring invalid delay %s for attribute %s", value, a->id); + crm_warn("Ignoring invalid delay %s for attribute %s", dampen_s, a->id); } g_hash_table_replace(attributes, a->id, a); @@ -169,6 +189,10 @@ attrd_populate_attribute(xmlNode *xml, const char *attr) if (a == NULL) { if (update_both || pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE, pcmk__str_none)) { a = attrd_create_attribute(xml); + if (a == NULL) { + return NULL; + } + } else { crm_warn("Could not update %s: attribute not found", attr); return NULL; diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c index 928c013..80e5580 100644 --- a/daemons/attrd/attrd_cib.c +++ b/daemons/attrd/attrd_cib.c @@ -10,6 +10,7 @@ #include <crm_internal.h> #include <errno.h> +#include <inttypes.h> // PRIu32 #include <stdbool.h> #include <stdlib.h> #include <glib.h> @@ -24,6 +25,188 @@ static int last_cib_op_done = 0; +static void write_attribute(attribute_t *a, bool ignore_delay); + +static void +attrd_cib_destroy_cb(gpointer user_data) +{ + cib_t *cib = user_data; + + cib->cmds->signoff(cib); + + if (attrd_shutting_down(false)) { + crm_info("Disconnected from the CIB manager"); + + } else { + // @TODO This should trigger a reconnect, not a shutdown + crm_crit("Lost connection to the CIB manager, shutting down"); + attrd_exit_status = CRM_EX_DISCONNECT; + attrd_shutdown(0); + } +} + +static void +attrd_cib_updated_cb(const char *event, xmlNode *msg) +{ + const xmlNode *patchset = NULL; + const char *client_name = NULL; + + if (attrd_shutting_down(true)) { + return; + } + + if (cib__get_notify_patchset(msg, &patchset) != pcmk_rc_ok) { + return; + } + + if (cib__element_in_patchset(patchset, XML_CIB_TAG_ALERTS)) { + mainloop_set_trigger(attrd_config_read); + } + + if (!attrd_election_won()) { + // Don't write attributes if we're not the writer + return; + } + + client_name = crm_element_value(msg, F_CIB_CLIENTNAME); + if (!cib__client_triggers_refresh(client_name)) { + // The CIB is still accurate + return; + } + + if (cib__element_in_patchset(patchset, XML_CIB_TAG_NODES) + || cib__element_in_patchset(patchset, XML_CIB_TAG_STATUS)) { + + /* An unsafe client modified the nodes or status section. Write + * transient attributes to ensure they're up-to-date in the CIB. + */ + if (client_name == NULL) { + client_name = crm_element_value(msg, F_CIB_CLIENTID); + } + crm_notice("Updating all attributes after %s event triggered by %s", + event, pcmk__s(client_name, "(unidentified client)")); + + attrd_write_attributes(attrd_write_all); + } +} + +int +attrd_cib_connect(int max_retry) +{ + static int attempts = 0; + + int rc = -ENOTCONN; + + the_cib = cib_new(); + if (the_cib == NULL) { + return -ENOTCONN; + } + + do { + if (attempts > 0) { + sleep(attempts); + } + attempts++; + crm_debug("Connection attempt %d to the CIB manager", attempts); + rc = the_cib->cmds->signon(the_cib, T_ATTRD, cib_command); + + } while ((rc != pcmk_ok) && (attempts < max_retry)); + + if (rc != pcmk_ok) { + crm_err("Connection to the CIB manager failed: %s " CRM_XS " rc=%d", + pcmk_strerror(rc), rc); + goto cleanup; + } + + crm_debug("Connected to the CIB manager after %d attempts", attempts); + + rc = the_cib->cmds->set_connection_dnotify(the_cib, attrd_cib_destroy_cb); + if (rc != pcmk_ok) { + crm_err("Could not set disconnection callback"); + goto cleanup; + } + + rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_DIFF_NOTIFY, + attrd_cib_updated_cb); + if (rc != pcmk_ok) { + crm_err("Could not set CIB notification callback"); + goto cleanup; + } + + return pcmk_ok; + +cleanup: + cib__clean_up_connection(&the_cib); + return -ENOTCONN; +} + +void +attrd_cib_disconnect(void) +{ + CRM_CHECK(the_cib != NULL, return); + the_cib->cmds->del_notify_callback(the_cib, T_CIB_DIFF_NOTIFY, + attrd_cib_updated_cb); + cib__clean_up_connection(&the_cib); +} + +static void +attrd_erase_cb(xmlNode *msg, int call_id, int rc, xmlNode *output, + void *user_data) +{ + do_crm_log_unlikely(((rc != pcmk_ok)? LOG_NOTICE : LOG_DEBUG), + "Cleared transient attributes: %s " + CRM_XS " xpath=%s rc=%d", + pcmk_strerror(rc), (char *) user_data, rc); +} + +#define XPATH_TRANSIENT "//node_state[@uname='%s']/" XML_TAG_TRANSIENT_NODEATTRS + +/*! + * \internal + * \brief Wipe all transient attributes for this node from the CIB + * + * Clear any previous transient node attributes from the CIB. This is + * normally done by the DC's controller when this node leaves the cluster, but + * this handles the case where the node restarted so quickly that the + * cluster layer didn't notice. + * + * \todo If pacemaker-attrd respawns after crashing (see PCMK_ENV_RESPAWNED), + * ideally we'd skip this and sync our attributes from the writer. + * However, currently we reject any values for us that the writer has, in + * attrd_peer_update(). + */ +static void +attrd_erase_attrs(void) +{ + int call_id = 0; + char *xpath = crm_strdup_printf(XPATH_TRANSIENT, attrd_cluster->uname); + + crm_info("Clearing transient attributes from CIB " CRM_XS " xpath=%s", + xpath); + + call_id = the_cib->cmds->remove(the_cib, xpath, NULL, cib_xpath); + the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, xpath, + "attrd_erase_cb", attrd_erase_cb, + free); +} + +/*! + * \internal + * \brief Prepare the CIB after cluster is connected + */ +void +attrd_cib_init(void) +{ + // We have no attribute values in memory, wipe the CIB to match + attrd_erase_attrs(); + + // Set a trigger for reading the CIB (for the alerts section) + attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL); + + // Always read the CIB at start-up + mainloop_set_trigger(attrd_config_read); +} + static gboolean attribute_timer_cb(gpointer data) { @@ -92,7 +275,7 @@ attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *use /* We deferred a write of a new update because this update was in * progress. Write out the new value without additional delay. */ - attrd_write_attribute(a, false); + write_attribute(a, false); /* We're re-attempting a write because the original failed; delay * the next attempt so we don't potentially flood the CIB manager @@ -121,48 +304,134 @@ attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *use } } -static void -build_update_element(xmlNode *parent, attribute_t *a, const char *nodeid, const char *value) +/*! + * \internal + * \brief Add a set-attribute update request to the current CIB transaction + * + * \param[in] attr Attribute to update + * \param[in] attr_id ID of attribute to update + * \param[in] node_id ID of node for which to update attribute value + * \param[in] set_id ID of attribute set + * \param[in] value New value for attribute + * + * \return Standard Pacemaker return code + */ +static int +add_set_attr_update(const attribute_t *attr, const char *attr_id, + const char *node_id, const char *set_id, const char *value) { - const char *set = NULL; - xmlNode *xml_obj = NULL; + xmlNode *update = create_xml_node(NULL, XML_CIB_TAG_STATE); + xmlNode *child = update; + int rc = ENOMEM; - xml_obj = create_xml_node(parent, XML_CIB_TAG_STATE); - crm_xml_add(xml_obj, XML_ATTR_ID, nodeid); + if (child == NULL) { + goto done; + } + crm_xml_add(child, XML_ATTR_ID, node_id); - xml_obj = create_xml_node(xml_obj, XML_TAG_TRANSIENT_NODEATTRS); - crm_xml_add(xml_obj, XML_ATTR_ID, nodeid); + child = create_xml_node(child, XML_TAG_TRANSIENT_NODEATTRS); + if (child == NULL) { + goto done; + } + crm_xml_add(child, XML_ATTR_ID, node_id); - if (pcmk__str_eq(a->set_type, XML_TAG_ATTR_SETS, pcmk__str_null_matches)) { - xml_obj = create_xml_node(xml_obj, XML_TAG_ATTR_SETS); - } else if (pcmk__str_eq(a->set_type, XML_TAG_UTILIZATION, pcmk__str_none)) { - xml_obj = create_xml_node(xml_obj, XML_TAG_UTILIZATION); - } else { - crm_err("Unknown set type attribute: %s", a->set_type); + child = create_xml_node(child, attr->set_type); + if (child == NULL) { + goto done; } + crm_xml_add(child, XML_ATTR_ID, set_id); - if (a->set_id) { - crm_xml_set_id(xml_obj, "%s", a->set_id); - } else { - crm_xml_set_id(xml_obj, "%s-%s", XML_CIB_TAG_STATUS, nodeid); + child = create_xml_node(child, XML_CIB_TAG_NVPAIR); + if (child == NULL) { + goto done; } - set = ID(xml_obj); + crm_xml_add(child, XML_ATTR_ID, attr_id); + crm_xml_add(child, XML_NVPAIR_ATTR_NAME, attr->id); + crm_xml_add(child, XML_NVPAIR_ATTR_VALUE, value); + + rc = the_cib->cmds->modify(the_cib, XML_CIB_TAG_STATUS, update, + cib_can_create|cib_transaction); + rc = pcmk_legacy2rc(rc); + +done: + free_xml(update); + return rc; +} + +/*! + * \internal + * \brief Add an unset-attribute update request to the current CIB transaction + * + * \param[in] attr Attribute to update + * \param[in] attr_id ID of attribute to update + * \param[in] node_id ID of node for which to update attribute value + * \param[in] set_id ID of attribute set + * + * \return Standard Pacemaker return code + */ +static int +add_unset_attr_update(const attribute_t *attr, const char *attr_id, + const char *node_id, const char *set_id) +{ + char *xpath = crm_strdup_printf("/" XML_TAG_CIB + "/" XML_CIB_TAG_STATUS + "/" XML_CIB_TAG_STATE + "[@" XML_ATTR_ID "='%s']" + "/" XML_TAG_TRANSIENT_NODEATTRS + "[@" XML_ATTR_ID "='%s']" + "/%s[@" XML_ATTR_ID "='%s']" + "/" XML_CIB_TAG_NVPAIR + "[@" XML_ATTR_ID "='%s' " + "and @" XML_NVPAIR_ATTR_NAME "='%s']", + node_id, node_id, attr->set_type, set_id, + attr_id, attr->id); + + int rc = the_cib->cmds->remove(the_cib, xpath, NULL, + cib_xpath|cib_transaction); + + free(xpath); + return pcmk_legacy2rc(rc); +} + +/*! + * \internal + * \brief Add an attribute update request to the current CIB transaction + * + * \param[in] attr Attribute to update + * \param[in] value New value for attribute + * \param[in] node_id ID of node for which to update attribute value + * + * \return Standard Pacemaker return code + */ +static int +add_attr_update(const attribute_t *attr, const char *value, const char *node_id) +{ + char *set_id = NULL; + char *attr_id = NULL; + int rc = pcmk_rc_ok; - xml_obj = create_xml_node(xml_obj, XML_CIB_TAG_NVPAIR); - if (a->uuid) { - crm_xml_set_id(xml_obj, "%s", a->uuid); + if (attr->set_id != NULL) { + pcmk__str_update(&set_id, attr->set_id); } else { - crm_xml_set_id(xml_obj, "%s-%s", set, a->id); + set_id = crm_strdup_printf("%s-%s", XML_CIB_TAG_STATUS, node_id); } - crm_xml_add(xml_obj, XML_NVPAIR_ATTR_NAME, a->id); + crm_xml_sanitize_id(set_id); - if(value) { - crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, value); + if (attr->uuid != NULL) { + pcmk__str_update(&attr_id, attr->uuid); + } else { + attr_id = crm_strdup_printf("%s-%s", set_id, attr->id); + } + crm_xml_sanitize_id(attr_id); + if (value != NULL) { + rc = add_set_attr_update(attr, attr_id, node_id, set_id, value); } else { - crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, ""); - crm_xml_add(xml_obj, "__delete__", XML_NVPAIR_ATTR_VALUE); + rc = add_unset_attr_update(attr, attr_id, node_id, set_id); } + free(set_id); + free(attr_id); + return rc; } static void @@ -202,15 +471,22 @@ attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr) return mainloop_timer_add(id, timeout_ms, FALSE, attribute_timer_cb, attr); } -void -attrd_write_attribute(attribute_t *a, bool ignore_delay) +/*! + * \internal + * \brief Write an attribute's values to the CIB if appropriate + * + * \param[in,out] a Attribute to write + * \param[in] ignore_delay If true, write attribute now regardless of any + * configured delay + */ +static void +write_attribute(attribute_t *a, bool ignore_delay) { int private_updates = 0, cib_updates = 0; - xmlNode *xml_top = NULL; attribute_value_t *v = NULL; GHashTableIter iter; - enum cib_call_options flags = cib_none; GHashTable *alert_attribute_value = NULL; + int rc = pcmk_ok; if (a == NULL) { return; @@ -218,32 +494,37 @@ attrd_write_attribute(attribute_t *a, bool ignore_delay) /* If this attribute will be written to the CIB ... */ if (!stand_alone && !a->is_private) { - /* Defer the write if now's not a good time */ - CRM_CHECK(the_cib != NULL, return); if (a->update && (a->update < last_cib_op_done)) { - crm_info("Write out of '%s' continuing: update %d considered lost", a->id, a->update); + crm_info("Write out of '%s' continuing: update %d considered lost", + a->id, a->update); a->update = 0; // Don't log this message again } else if (a->update) { - crm_info("Write out of '%s' delayed: update %d in progress", a->id, a->update); - return; + crm_info("Write out of '%s' delayed: update %d in progress", + a->id, a->update); + goto done; } else if (mainloop_timer_running(a->timer)) { if (ignore_delay) { - /* 'refresh' forces a write of the current value of all attributes - * Cancel any existing timers, we're writing it NOW - */ mainloop_timer_stop(a->timer); - crm_debug("Write out of '%s': timer is running but ignore delay", a->id); + crm_debug("Overriding '%s' write delay", a->id); } else { - crm_info("Write out of '%s' delayed: timer is running", a->id); - return; + crm_info("Delaying write of '%s'", a->id); + goto done; } } - /* Initialize the status update XML */ - xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS); + // Initiate a transaction for all the peer value updates + CRM_CHECK(the_cib != NULL, goto done); + the_cib->cmds->set_user(the_cib, a->user); + rc = the_cib->cmds->init_transaction(the_cib); + if (rc != pcmk_ok) { + crm_err("Failed to write %s (id %s, set %s): Could not initiate " + "CIB transaction", + a->id, pcmk__s(a->uuid, "n/a"), pcmk__s(a->set_id, "n/a")); + goto done; + } } /* Attribute will be written shortly, so clear changed flag */ @@ -256,12 +537,14 @@ attrd_write_attribute(attribute_t *a, bool ignore_delay) a->force_write = FALSE; /* Make the table for the attribute trap */ - alert_attribute_value = pcmk__strikey_table(NULL, attrd_free_attribute_value); + alert_attribute_value = pcmk__strikey_table(NULL, + attrd_free_attribute_value); /* Iterate over each peer value of this attribute */ g_hash_table_iter_init(&iter, a->values); - while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & v)) { - crm_node_t *peer = crm_get_peer_full(v->nodeid, v->nodename, CRM_GET_PEER_ANY); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { + crm_node_t *peer = crm_get_peer_full(v->nodeid, v->nodename, + CRM_GET_PEER_ANY); /* If the value's peer info does not correspond to a peer, ignore it */ if (peer == NULL) { @@ -291,11 +574,20 @@ attrd_write_attribute(attribute_t *a, bool ignore_delay) continue; } - /* Add this value to status update XML */ - crm_debug("Updating %s[%s]=%s (peer known as %s, UUID %s, ID %u/%u)", + // Update this value as part of the CIB transaction we're building + rc = add_attr_update(a, v->current, peer->uuid); + if (rc != pcmk_rc_ok) { + crm_err("Failed to update %s[%s]=%s (peer known as %s, UUID %s, " + "ID %" PRIu32 "/%" PRIu32 "): %s", + a->id, v->nodename, v->current, peer->uname, peer->uuid, + peer->id, v->nodeid, pcmk_rc_str(rc)); + continue; + } + + crm_debug("Updating %s[%s]=%s (peer known as %s, UUID %s, ID " + "%" PRIu32 "/%" PRIu32 ")", a->id, v->nodename, v->current, peer->uname, peer->uuid, peer->id, v->nodeid); - build_update_element(xml_top, a, peer->uuid, v->current); cib_updates++; /* Preservation of the attribute to transmit alert */ @@ -305,12 +597,6 @@ attrd_write_attribute(attribute_t *a, bool ignore_delay) v->requested = NULL; if (v->current) { v->requested = strdup(v->current); - } else { - /* Older attrd versions don't know about the cib_mixed_update - * flag so make sure it goes to the local cib which does - */ - cib__set_call_options(flags, crm_system_name, - cib_mixed_update|cib_scope_local); } } @@ -319,40 +605,55 @@ attrd_write_attribute(attribute_t *a, bool ignore_delay) private_updates, pcmk__plural_s(private_updates), a->id, pcmk__s(a->uuid, "n/a"), pcmk__s(a->set_id, "n/a")); } - if (cib_updates) { - crm_log_xml_trace(xml_top, __func__); + if (cib_updates > 0) { + char *id = NULL; - a->update = cib_internal_op(the_cib, PCMK__CIB_REQUEST_MODIFY, NULL, - XML_CIB_TAG_STATUS, xml_top, NULL, flags, - a->user); + // Commit transaction + a->update = the_cib->cmds->end_transaction(the_cib, true, cib_none); crm_info("Sent CIB request %d with %d change%s for %s (id %s, set %s)", a->update, cib_updates, pcmk__plural_s(cib_updates), a->id, pcmk__s(a->uuid, "n/a"), pcmk__s(a->set_id, "n/a")); - the_cib->cmds->register_callback_full(the_cib, a->update, - CIB_OP_TIMEOUT_S, FALSE, - strdup(a->id), - "attrd_cib_callback", - attrd_cib_callback, free); - /* Transmit alert of the attribute */ - send_alert_attributes_value(a, alert_attribute_value); + pcmk__str_update(&id, a->id); + if (the_cib->cmds->register_callback_full(the_cib, a->update, + CIB_OP_TIMEOUT_S, FALSE, id, + "attrd_cib_callback", + attrd_cib_callback, free)) { + // Transmit alert of the attribute + send_alert_attributes_value(a, alert_attribute_value); + } } - g_hash_table_destroy(alert_attribute_value); - free_xml(xml_top); +done: + // Discard transaction (if any) + if (the_cib != NULL) { + the_cib->cmds->end_transaction(the_cib, false, cib_none); + the_cib->cmds->set_user(the_cib, NULL); + } + + if (alert_attribute_value != NULL) { + g_hash_table_destroy(alert_attribute_value); + } } +/*! + * \internal + * \brief Write out attributes + * + * \param[in] options Group of enum attrd_write_options + */ void -attrd_write_attributes(bool all, bool ignore_delay) +attrd_write_attributes(uint32_t options) { GHashTableIter iter; attribute_t *a = NULL; - crm_debug("Writing out %s attributes", all? "all" : "changed"); + crm_debug("Writing out %s attributes", + pcmk_is_set(options, attrd_write_all)? "all" : "changed"); g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) { - if (!all && a->unknown_peer_uuids) { + if (!pcmk_is_set(options, attrd_write_all) && a->unknown_peer_uuids) { // Try writing this attribute again, in case peer ID was learned a->changed = true; } else if (a->force_write) { @@ -360,9 +661,14 @@ attrd_write_attributes(bool all, bool ignore_delay) a->changed = true; } - if(all || a->changed) { - /* When forced write flag is set, ignore delay. */ - attrd_write_attribute(a, (a->force_write ? true : ignore_delay)); + if (pcmk_is_set(options, attrd_write_all) || a->changed) { + bool ignore_delay = pcmk_is_set(options, attrd_write_no_delay); + + if (a->force_write) { + // Always ignore delay when forced write flag is set + ignore_delay = true; + } + write_attribute(a, ignore_delay); } else { crm_trace("Skipping unchanged attribute %s", a->id); } @@ -373,7 +679,7 @@ void attrd_write_or_elect_attribute(attribute_t *a) { if (attrd_election_won()) { - attrd_write_attribute(a, false); + write_attribute(a, false); } else { attrd_start_election_if_needed(); } diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c index ef205e6..86dc67b 100644 --- a/daemons/attrd/attrd_corosync.c +++ b/daemons/attrd/attrd_corosync.c @@ -23,8 +23,6 @@ #include "pacemaker-attrd.h" -extern crm_exit_t attrd_exit_status; - static xmlNode * attrd_confirmation(int callid) { @@ -48,7 +46,7 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml) return; } - if (attrd_shutting_down()) { + if (attrd_shutting_down(false)) { /* If we're shutting down, we want to continue responding to election * ops as long as we're a cluster member (because our vote may be * needed). Ignore all other messages. @@ -133,11 +131,11 @@ attrd_cpg_dispatch(cpg_handle_t handle, static void attrd_cpg_destroy(gpointer unused) { - if (attrd_shutting_down()) { - crm_info("Corosync disconnection complete"); + if (attrd_shutting_down(false)) { + crm_info("Disconnected from Corosync process group"); } else { - crm_crit("Lost connection to cluster layer, shutting down"); + crm_crit("Lost connection to Corosync process group, shutting down"); attrd_exit_status = CRM_EX_DISCONNECT; attrd_shutdown(0); } @@ -180,7 +178,7 @@ cache_remote_node(const char *node_name) /* If we previously assumed this node was an unseen cluster node, * remove its entry from the cluster peer cache. */ - crm_node_t *dup = pcmk__search_cluster_node_cache(0, node_name); + crm_node_t *dup = pcmk__search_cluster_node_cache(0, node_name, NULL); if (dup && (dup->uuid == NULL)) { reap_crm_member(0, node_name); @@ -285,7 +283,7 @@ record_peer_nodeid(attribute_value_t *v, const char *host) crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid); if (attrd_election_won()) { - attrd_write_attributes(false, false); + attrd_write_attributes(attrd_write_changed); } } @@ -476,9 +474,7 @@ attrd_peer_clear_failure(pcmk__request_t *request) crm_xml_add(xml, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE); /* Make sure value is not set, so we delete */ - if (crm_element_value(xml, PCMK__XA_ATTR_VALUE)) { - crm_xml_replace(xml, PCMK__XA_ATTR_VALUE, NULL); - } + xml_remove_prop(xml, PCMK__XA_ATTR_VALUE); g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, (gpointer *) &attr, NULL)) { @@ -591,7 +587,8 @@ attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, { bool handle_sync_point = false; - if (xml_has_children(xml)) { + CRM_CHECK((peer != NULL) && (xml != NULL), return); + if (xml->children != NULL) { for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL; child = crm_next_same_xml(child)) { attrd_copy_xml_attributes(xml, child); diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c index 3b6b55a..82fbe8a 100644 --- a/daemons/attrd/attrd_elections.c +++ b/daemons/attrd/attrd_elections.c @@ -1,5 +1,5 @@ /* - * Copyright 2013-2022 the Pacemaker project contributors + * Copyright 2013-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -25,9 +25,11 @@ attrd_election_cb(gpointer user_data) /* Update the peers after an election */ attrd_peer_sync(NULL, NULL); - /* Update the CIB after an election */ - attrd_write_attributes(true, false); - return FALSE; + /* After winning an election, update the CIB with the values of all + * attributes as the winner knows them. + */ + attrd_write_attributes(attrd_write_all); + return G_SOURCE_REMOVE; } void @@ -48,7 +50,7 @@ attrd_start_election_if_needed(void) { if ((peer_writer == NULL) && (election_state(writer) != election_in_progress) - && !attrd_shutting_down()) { + && !attrd_shutting_down(false)) { crm_info("Starting an election to determine the writer"); election_vote(writer); @@ -70,7 +72,7 @@ attrd_handle_election_op(const crm_node_t *peer, xmlNode *xml) crm_xml_add(xml, F_CRM_HOST_FROM, peer->uname); // Don't become writer if we're shutting down - rc = election_count_vote(writer, xml, !attrd_shutting_down()); + rc = election_count_vote(writer, xml, !attrd_shutting_down(false)); switch(rc) { case election_start: diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c index 9d3dfff..05c4a69 100644 --- a/daemons/attrd/attrd_ipc.c +++ b/daemons/attrd/attrd_ipc.c @@ -140,12 +140,8 @@ attrd_client_clear_failure(pcmk__request_t *request) } /* Make sure attribute and value are not set, so we delete via regex */ - if (crm_element_value(xml, PCMK__XA_ATTR_NAME)) { - crm_xml_replace(xml, PCMK__XA_ATTR_NAME, NULL); - } - if (crm_element_value(xml, PCMK__XA_ATTR_VALUE)) { - crm_xml_replace(xml, PCMK__XA_ATTR_VALUE, NULL); - } + xml_remove_prop(xml, PCMK__XA_ATTR_NAME); + xml_remove_prop(xml, PCMK__XA_ATTR_VALUE); return attrd_client_update(request); } @@ -166,7 +162,8 @@ attrd_client_peer_remove(pcmk__request_t *request) crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID, &nodeid); if (nodeid > 0) { - crm_node_t *node = pcmk__search_cluster_node_cache(nodeid, NULL); + crm_node_t *node = pcmk__search_cluster_node_cache(nodeid, NULL, + NULL); char *host_alloc = NULL; if (node && node->uname) { @@ -235,7 +232,7 @@ attrd_client_refresh(pcmk__request_t *request) crm_info("Updating all attributes"); attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); - attrd_write_attributes(true, true); + attrd_write_attributes(attrd_write_all|attrd_write_no_delay); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; @@ -282,7 +279,7 @@ expand_regexes(xmlNode *xml, const char *attr, const char *value, const char *re * regex and replace it with the name. */ attrd_copy_xml_attributes(xml, child); - crm_xml_replace(child, PCMK__XA_ATTR_PATTERN, NULL); + xml_remove_prop(child, PCMK__XA_ATTR_PATTERN); crm_xml_add(child, PCMK__XA_ATTR_NAME, attr); } } @@ -401,14 +398,18 @@ send_child_update(xmlNode *child, void *data) xmlNode * attrd_client_update(pcmk__request_t *request) { - xmlNode *xml = request->xml; + xmlNode *xml = NULL; const char *attr, *value, *regex; + CRM_CHECK((request != NULL) && (request->xml != NULL), return NULL); + + xml = request->xml; + /* If the message has children, that means it is a message from a newer * client that supports sending multiple operations at a time. There are * two ways we can handle that. */ - if (xml_has_children(xml)) { + if (xml->children != NULL) { if (ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version)) { /* First, if all peers support a certain protocol version, we can * just broadcast the big message and they'll handle it. However, @@ -494,7 +495,7 @@ static int32_t attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) { crm_trace("New client connection %p", c); - if (attrd_shutting_down()) { + if (attrd_shutting_down(false)) { crm_info("Ignoring new connection from pid %d during shutdown", pcmk__client_pid(c)); return -EPERM; diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c index 184176a..89da6d8 100644 --- a/daemons/attrd/attrd_messages.c +++ b/daemons/attrd/attrd_messages.c @@ -20,6 +20,36 @@ int minimum_protocol_version = -1; static GHashTable *attrd_handlers = NULL; +static bool +is_sync_point_attr(xmlAttrPtr attr, void *data) +{ + return pcmk__str_eq((const char *) attr->name, PCMK__XA_ATTR_SYNC_POINT, pcmk__str_none); +} + +static int +remove_sync_point_attribute(xmlNode *xml, void *data) +{ + pcmk__xe_remove_matching_attrs(xml, is_sync_point_attr, NULL); + pcmk__xe_foreach_child(xml, XML_ATTR_OP, remove_sync_point_attribute, NULL); + return pcmk_rc_ok; +} + +/* Sync points on a multi-update IPC message to an attrd too old to support + * multi-update messages won't work. Strip the sync point attribute off here + * so we don't pretend to support this situation and instead ACK the client + * immediately. + */ +static void +remove_unsupported_sync_points(pcmk__request_t *request) +{ + if (request->xml->children != NULL && !ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version) && + attrd_request_has_sync_point(request->xml)) { + crm_warn("Ignoring sync point in request from %s because not all nodes support it", + pcmk__request_origin(request)); + remove_sync_point_attribute(request->xml, NULL); + } +} + static xmlNode * handle_unknown_request(pcmk__request_t *request) { @@ -42,6 +72,8 @@ handle_clear_failure_request(pcmk__request_t *request) pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } else { + remove_unsupported_sync_points(request); + if (attrd_request_has_sync_point(request->xml)) { /* If this client supplied a sync point it wants to wait for, add it to * the wait list. Clients on this list will not receive an ACK until @@ -180,6 +212,8 @@ handle_update_request(pcmk__request_t *request) return NULL; } else { + remove_unsupported_sync_points(request); + if (attrd_request_has_sync_point(request->xml)) { /* If this client supplied a sync point it wants to wait for, add it to * the wait list. Clients on this list will not receive an ACK until diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c index d59ddd5..1a6c24c 100644 --- a/daemons/attrd/attrd_sync.c +++ b/daemons/attrd/attrd_sync.c @@ -313,7 +313,9 @@ attrd_cluster_sync_point_update(xmlNode *xml) const char * attrd_request_sync_point(xmlNode *xml) { - if (xml_has_children(xml)) { + CRM_CHECK(xml != NULL, return NULL); + + if (xml->children != NULL) { xmlNode *child = pcmk__xe_match(xml, XML_ATTR_OP, PCMK__XA_ATTR_SYNC_POINT, NULL); if (child) { diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c index 7de8dd9..341ee1a 100644 --- a/daemons/attrd/attrd_utils.c +++ b/daemons/attrd/attrd_utils.c @@ -56,26 +56,22 @@ attrd_clear_requesting_shutdown(void) /*! * \internal - * \brief Check whether we're currently requesting shutdown + * \brief Check whether local attribute manager is shutting down * - * \return true if requesting shutdown, false otherwise - */ -bool -attrd_requesting_shutdown(void) -{ - return requesting_shutdown; -} - -/*! - * \internal - * \brief Check whether we're currently shutting down + * \param[in] if_requested Also consider presence of "shutdown" attribute * - * \return true if shutting down, false otherwise + * \return \c true if local attribute manager has begun shutdown sequence + * or (if \p if_requested is \c true) whether local node has a nonzero + * "shutdown" attribute set, otherwise \c false + * \note Most callers should pass \c false for \p if_requested, because the + * attribute manager needs to continue performing while the controller is + * shutting down, and even needs to be eligible for election in case all + * nodes are shutting down. */ bool -attrd_shutting_down(void) +attrd_shutting_down(bool if_requested) { - return shutting_down; + return shutting_down || (if_requested && requesting_shutdown); } /*! @@ -137,39 +133,6 @@ attrd_run_mainloop(void) g_main_loop_run(mloop); } -void -attrd_cib_disconnect(void) -{ - CRM_CHECK(the_cib != NULL, return); - the_cib->cmds->del_notify_callback(the_cib, T_CIB_REPLACE_NOTIFY, attrd_cib_replaced_cb); - the_cib->cmds->del_notify_callback(the_cib, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb); - cib__clean_up_connection(&the_cib); -} - -void -attrd_cib_replaced_cb(const char *event, xmlNode * msg) -{ - int change_section = cib_change_section_nodes | cib_change_section_status | cib_change_section_alerts; - - if (attrd_requesting_shutdown() || attrd_shutting_down()) { - return; - } - - crm_element_value_int(msg, F_CIB_CHANGE_SECTION, &change_section); - - if (attrd_election_won()) { - if (change_section & (cib_change_section_nodes | cib_change_section_status)) { - crm_notice("Updating all attributes after %s event", event); - attrd_write_attributes(true, false); - } - } - - if (change_section & cib_change_section_alerts) { - // Check for changes in alerts - mainloop_set_trigger(attrd_config_read); - } -} - /* strlen("value") */ #define plus_plus_len (5) diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c index 037825b..8091c5b 100644 --- a/daemons/attrd/pacemaker-attrd.c +++ b/daemons/attrd/pacemaker-attrd.c @@ -63,140 +63,6 @@ crm_cluster_t *attrd_cluster = NULL; crm_trigger_t *attrd_config_read = NULL; crm_exit_t attrd_exit_status = CRM_EX_OK; -static void -attrd_cib_destroy_cb(gpointer user_data) -{ - cib_t *conn = user_data; - - conn->cmds->signoff(conn); /* Ensure IPC is cleaned up */ - - if (attrd_shutting_down()) { - crm_info("Connection disconnection complete"); - - } else { - /* eventually this should trigger a reconnect, not a shutdown */ - crm_crit("Lost connection to the CIB manager, shutting down"); - attrd_exit_status = CRM_EX_DISCONNECT; - attrd_shutdown(0); - } - - return; -} - -static void -attrd_erase_cb(xmlNode *msg, int call_id, int rc, xmlNode *output, - void *user_data) -{ - do_crm_log_unlikely((rc? LOG_NOTICE : LOG_DEBUG), - "Cleared transient attributes: %s " - CRM_XS " xpath=%s rc=%d", - pcmk_strerror(rc), (char *) user_data, rc); -} - -#define XPATH_TRANSIENT "//node_state[@uname='%s']/" XML_TAG_TRANSIENT_NODEATTRS - -/*! - * \internal - * \brief Wipe all transient attributes for this node from the CIB - * - * Clear any previous transient node attributes from the CIB. This is - * normally done by the DC's controller when this node leaves the cluster, but - * this handles the case where the node restarted so quickly that the - * cluster layer didn't notice. - * - * \todo If pacemaker-attrd respawns after crashing (see PCMK_respawned), - * ideally we'd skip this and sync our attributes from the writer. - * However, currently we reject any values for us that the writer has, in - * attrd_peer_update(). - */ -static void -attrd_erase_attrs(void) -{ - int call_id; - char *xpath = crm_strdup_printf(XPATH_TRANSIENT, attrd_cluster->uname); - - crm_info("Clearing transient attributes from CIB " CRM_XS " xpath=%s", - xpath); - - call_id = the_cib->cmds->remove(the_cib, xpath, NULL, cib_xpath); - the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, xpath, - "attrd_erase_cb", attrd_erase_cb, - free); -} - -static int -attrd_cib_connect(int max_retry) -{ - static int attempts = 0; - - int rc = -ENOTCONN; - - the_cib = cib_new(); - if (the_cib == NULL) { - return -ENOTCONN; - } - - do { - if(attempts > 0) { - sleep(attempts); - } - - attempts++; - crm_debug("Connection attempt %d to the CIB manager", attempts); - rc = the_cib->cmds->signon(the_cib, T_ATTRD, cib_command); - - } while(rc != pcmk_ok && attempts < max_retry); - - if (rc != pcmk_ok) { - crm_err("Connection to the CIB manager failed: %s " CRM_XS " rc=%d", - pcmk_strerror(rc), rc); - goto cleanup; - } - - crm_debug("Connected to the CIB manager after %d attempts", attempts); - - rc = the_cib->cmds->set_connection_dnotify(the_cib, attrd_cib_destroy_cb); - if (rc != pcmk_ok) { - crm_err("Could not set disconnection callback"); - goto cleanup; - } - - rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_REPLACE_NOTIFY, attrd_cib_replaced_cb); - if(rc != pcmk_ok) { - crm_err("Could not set CIB notification callback"); - goto cleanup; - } - - rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb); - if (rc != pcmk_ok) { - crm_err("Could not set CIB notification callback (update)"); - goto cleanup; - } - - return pcmk_ok; - - cleanup: - cib__clean_up_connection(&the_cib); - return -ENOTCONN; -} - -/*! - * \internal - * \brief Prepare the CIB after cluster is connected - */ -static void -attrd_cib_init(void) -{ - // We have no attribute values in memory, wipe the CIB to match - attrd_erase_attrs(); - - // Set a trigger for reading the CIB (for the alerts section) - attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL); - - // Always read the CIB at start-up - mainloop_set_trigger(attrd_config_read); -} - static bool ipc_already_running(void) { @@ -208,8 +74,10 @@ ipc_already_running(void) return false; } - rc = pcmk_connect_ipc(old_instance, pcmk_ipc_dispatch_sync); + rc = pcmk__connect_ipc(old_instance, pcmk_ipc_dispatch_sync, 2); if (rc != pcmk_rc_ok) { + crm_debug("No existing %s manager instance found: %s", + pcmk_ipc_name(old_instance, true), pcmk_rc_str(rc)); pcmk_free_ipc_api(old_instance); return false; } @@ -277,7 +145,7 @@ main(int argc, char **argv) attrd_exit_status = CRM_EX_OK; g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, "%s", msg); - crm_err(msg); + crm_err("%s", msg); goto done; } diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h index 329fb5a..b8929a7 100644 --- a/daemons/attrd/pacemaker-attrd.h +++ b/daemons/attrd/pacemaker-attrd.h @@ -57,13 +57,14 @@ void attrd_run_mainloop(void); void attrd_set_requesting_shutdown(void); void attrd_clear_requesting_shutdown(void); void attrd_free_waitlist(void); -bool attrd_requesting_shutdown(void); -bool attrd_shutting_down(void); +bool attrd_shutting_down(bool if_requested); void attrd_shutdown(int nsig); void attrd_init_ipc(void); void attrd_ipc_fini(void); +int attrd_cib_connect(int max_retry); void attrd_cib_disconnect(void); +void attrd_cib_init(void); bool attrd_value_needs_expansion(const char *value); int attrd_expand_value(const char *value, const char *old_value); @@ -92,6 +93,7 @@ int attrd_failure_regex(regex_t *regex, const char *rsc, const char *op, guint interval_ms); extern cib_t *the_cib; +extern crm_exit_t attrd_exit_status; /* Alerts */ @@ -100,8 +102,6 @@ extern crm_trigger_t *attrd_config_read; void attrd_lrmd_disconnect(void); gboolean attrd_read_options(gpointer user_data); -void attrd_cib_replaced_cb(const char *event, xmlNode * msg); -void attrd_cib_updated_cb(const char *event, xmlNode *msg); int attrd_send_attribute_alert(const char *node, int nodeid, const char *attr, const char *value); @@ -177,8 +177,13 @@ void attrd_free_attribute(gpointer data); void attrd_free_attribute_value(gpointer data); attribute_t *attrd_populate_attribute(xmlNode *xml, const char *attr); -void attrd_write_attribute(attribute_t *a, bool ignore_delay); -void attrd_write_attributes(bool all, bool ignore_delay); +enum attrd_write_options { + attrd_write_changed = 0, + attrd_write_all = (1 << 0), + attrd_write_no_delay = (1 << 1), +}; + +void attrd_write_attributes(uint32_t options); void attrd_write_or_elect_attribute(attribute_t *a); extern int minimum_protocol_version; diff --git a/daemons/based/Makefile.am b/daemons/based/Makefile.am index 053d93c..022fc47 100644 --- a/daemons/based/Makefile.am +++ b/daemons/based/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright 2004-2021 the Pacemaker project contributors +# Copyright 2004-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # @@ -13,35 +13,37 @@ EXTRA_DIST = cib.pam halibdir = $(CRM_DAEMON_DIR) -COMMONLIBS = $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/cib/libcib.la - halib_PROGRAMS = pacemaker-based -noinst_HEADERS = pacemaker-based.h +noinst_HEADERS = based_transaction.h \ + pacemaker-based.h pacemaker_based_CFLAGS = $(CFLAGS_HARDENED_EXE) pacemaker_based_LDFLAGS = $(LDFLAGS_HARDENED_EXE) -pacemaker_based_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \ - $(COMMONLIBS) $(CLUSTERLIBS) - -pacemaker_based_SOURCES = pacemaker-based.c \ - based_callbacks.c \ - based_common.c \ - based_io.c \ - based_messages.c \ - based_notify.c \ - based_remote.c - -clean-generic: - rm -f *.log *.debug *.xml *~ - -if BUILD_LEGACY_LINKS +pacemaker_based_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la +pacemaker_based_LDADD += $(top_builddir)/lib/cib/libcib.la +pacemaker_based_LDADD += $(top_builddir)/lib/common/libcrmcommon.la +pacemaker_based_LDADD += $(CLUSTERLIBS) + +pacemaker_based_SOURCES = pacemaker-based.c \ + based_callbacks.c \ + based_io.c \ + based_messages.c \ + based_notify.c \ + based_operation.c \ + based_remote.c \ + based_transaction.c + +.PHONY: install-exec-hook install-exec-hook: +if BUILD_LEGACY_LINKS $(MKDIR_P) -- $(DESTDIR)$(CRM_DAEMON_DIR) cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f cib && $(LN_S) pacemaker-based cib +endif +.PHONY: uninstall-hook uninstall-hook: +if BUILD_LEGACY_LINKS cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f cib endif diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c index 3726caa..4fac222 100644 --- a/daemons/based/based_callbacks.c +++ b/daemons/based/based_callbacks.c @@ -20,6 +20,9 @@ #include <fcntl.h> #include <inttypes.h> // PRIu64 +#include <glib.h> +#include <libxml/tree.h> + #include <crm/crm.h> #include <crm/cib.h> #include <crm/msg_xml.h> @@ -31,7 +34,6 @@ #include <pacemaker-based.h> #define EXIT_ESCALATION_MS 10000 -#define OUR_NODENAME (stand_alone? "localhost" : crm_cluster->uname) static unsigned long cib_local_bcast_num = 0; @@ -50,11 +52,10 @@ qb_ipcs_service_t *ipcs_ro = NULL; qb_ipcs_service_t *ipcs_rw = NULL; qb_ipcs_service_t *ipcs_shm = NULL; -static void cib_process_request(xmlNode *request, gboolean privileged, - const pcmk__client_t *cib_client); - -static int cib_process_command(xmlNode *request, xmlNode **reply, - xmlNode **cib_diff, gboolean privileged); +static int cib_process_command(xmlNode *request, + const cib__operation_t *operation, + cib__op_fn_t op_function, xmlNode **reply, + xmlNode **cib_diff, bool privileged); static gboolean cib_common_callback(qb_ipcs_connection_t *c, void *data, size_t size, gboolean privileged); @@ -138,11 +139,130 @@ struct qb_ipcs_service_handlers ipc_rw_callbacks = { .connection_destroyed = cib_ipc_destroy }; +/*! + * \internal + * \brief Create reply XML for a CIB request + * + * \param[in] op CIB operation type + * \param[in] call_id CIB call ID + * \param[in] client_id CIB client ID + * \param[in] call_options Group of <tt>enum cib_call_options</tt> flags + * \param[in] rc Request return code + * \param[in] call_data Request output data + * + * \return Reply XML + * + * \note The caller is responsible for freeing the return value using + * \p free_xml(). + */ +static xmlNode * +create_cib_reply(const char *op, const char *call_id, const char *client_id, + int call_options, int rc, xmlNode *call_data) +{ + xmlNode *reply = create_xml_node(NULL, "cib-reply"); + + CRM_ASSERT(reply != NULL); + + crm_xml_add(reply, F_TYPE, T_CIB); + crm_xml_add(reply, F_CIB_OPERATION, op); + crm_xml_add(reply, F_CIB_CALLID, call_id); + crm_xml_add(reply, F_CIB_CLIENTID, client_id); + crm_xml_add_int(reply, F_CIB_CALLOPTS, call_options); + crm_xml_add_int(reply, F_CIB_RC, rc); + + if (call_data != NULL) { + crm_trace("Attaching reply output"); + add_message_xml(reply, F_CIB_CALLDATA, call_data); + } + + crm_log_xml_explicit(reply, "cib:reply"); + return reply; +} + +static void +do_local_notify(const xmlNode *notify_src, const char *client_id, + bool sync_reply, bool from_peer) +{ + int rid = 0; + int call_id = 0; + pcmk__client_t *client_obj = NULL; + + CRM_ASSERT(notify_src && client_id); + + crm_element_value_int(notify_src, F_CIB_CALLID, &call_id); + + client_obj = pcmk__find_client_by_id(client_id); + if (client_obj == NULL) { + crm_debug("Could not send response %d: client %s not found", + call_id, client_id); + return; + } + + if (sync_reply) { + if (client_obj->ipcs) { + CRM_LOG_ASSERT(client_obj->request_id); + + rid = client_obj->request_id; + client_obj->request_id = 0; + + crm_trace("Sending response %d to client %s%s", + rid, pcmk__client_name(client_obj), + (from_peer? " (originator of delegated request)" : "")); + } else { + crm_trace("Sending response (call %d) to client %s%s", + call_id, pcmk__client_name(client_obj), + (from_peer? " (originator of delegated request)" : "")); + } + + } else { + crm_trace("Sending event %d to client %s%s", + call_id, pcmk__client_name(client_obj), + (from_peer? " (originator of delegated request)" : "")); + } + + switch (PCMK__CLIENT_TYPE(client_obj)) { + case pcmk__client_ipc: + { + int rc = pcmk__ipc_send_xml(client_obj, rid, notify_src, + (sync_reply? crm_ipc_flags_none + : crm_ipc_server_event)); + + if (rc != pcmk_rc_ok) { + crm_warn("%s reply to client %s failed: %s " CRM_XS " rc=%d", + (sync_reply? "Synchronous" : "Asynchronous"), + pcmk__client_name(client_obj), pcmk_rc_str(rc), + rc); + } + } + break; +#ifdef HAVE_GNUTLS_GNUTLS_H + case pcmk__client_tls: +#endif + case pcmk__client_tcp: + pcmk__remote_send_xml(client_obj->remote, notify_src); + break; + default: + crm_err("Unknown transport for client %s " + CRM_XS " flags=%#016" PRIx64, + pcmk__client_name(client_obj), client_obj->flags); + } +} + void cib_common_callback_worker(uint32_t id, uint32_t flags, xmlNode * op_request, pcmk__client_t *cib_client, gboolean privileged) { const char *op = crm_element_value(op_request, F_CIB_OPERATION); + int call_options = cib_none; + + crm_element_value_int(op_request, F_CIB_CALLOPTS, &call_options); + + /* Requests with cib_transaction set should not be sent to based directly + * (outside of a commit-transaction request) + */ + if (pcmk_is_set(call_options, cib_transaction)) { + return; + } if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) { if (flags & crm_ipc_client_response) { @@ -180,9 +300,6 @@ cib_common_callback_worker(uint32_t id, uint32_t flags, xmlNode * op_request, } else if (pcmk__str_eq(type, T_CIB_DIFF_NOTIFY, pcmk__str_casei)) { bit = cib_notify_diff; - } else if (pcmk__str_eq(type, T_CIB_REPLACE_NOTIFY, pcmk__str_casei)) { - bit = cib_notify_replace; - } else { status = CRM_EX_INVALID_PARAM; } @@ -354,9 +471,7 @@ process_ping_reply(xmlNode *reply) if(remote_cib && remote_cib->children) { // Additional debug xml_calculate_changes(the_cib, remote_cib); - - pcmk__output_set_log_level(logger_out, LOG_INFO); - pcmk__xml_show_changes(logger_out, remote_cib); + pcmk__log_xml_changes(LOG_INFO, remote_cib); crm_trace("End of differences"); } @@ -367,75 +482,6 @@ process_ping_reply(xmlNode *reply) } static void -do_local_notify(xmlNode * notify_src, const char *client_id, - gboolean sync_reply, gboolean from_peer) -{ - int rid = 0; - int call_id = 0; - pcmk__client_t *client_obj = NULL; - - CRM_ASSERT(notify_src && client_id); - - crm_element_value_int(notify_src, F_CIB_CALLID, &call_id); - - client_obj = pcmk__find_client_by_id(client_id); - if (client_obj == NULL) { - crm_debug("Could not send response %d: client %s not found", - call_id, client_id); - return; - } - - if (sync_reply) { - if (client_obj->ipcs) { - CRM_LOG_ASSERT(client_obj->request_id); - - rid = client_obj->request_id; - client_obj->request_id = 0; - - crm_trace("Sending response %d to client %s%s", - rid, pcmk__client_name(client_obj), - (from_peer? " (originator of delegated request)" : "")); - } else { - crm_trace("Sending response (call %d) to client %s%s", - call_id, pcmk__client_name(client_obj), - (from_peer? " (originator of delegated request)" : "")); - } - - } else { - crm_trace("Sending event %d to client %s%s", - call_id, pcmk__client_name(client_obj), - (from_peer? " (originator of delegated request)" : "")); - } - - switch (PCMK__CLIENT_TYPE(client_obj)) { - case pcmk__client_ipc: - { - int rc = pcmk__ipc_send_xml(client_obj, rid, notify_src, - (sync_reply? crm_ipc_flags_none - : crm_ipc_server_event)); - - if (rc != pcmk_rc_ok) { - crm_warn("%s reply to client %s failed: %s " CRM_XS " rc=%d", - (sync_reply? "Synchronous" : "Asynchronous"), - pcmk__client_name(client_obj), pcmk_rc_str(rc), - rc); - } - } - break; -#ifdef HAVE_GNUTLS_GNUTLS_H - case pcmk__client_tls: -#endif - case pcmk__client_tcp: - pcmk__remote_send_xml(client_obj->remote, notify_src); - break; - default: - crm_err("Unknown transport for client %s " - CRM_XS " flags=%#016" PRIx64, - pcmk__client_name(client_obj), client_obj->flags); - } -} - -static void local_notify_destroy_callback(gpointer data) { cib_local_notify_t *notify = data; @@ -448,7 +494,7 @@ local_notify_destroy_callback(gpointer data) static void check_local_notify(int bcast_id) { - cib_local_notify_t *notify = NULL; + const cib_local_notify_t *notify = NULL; if (!local_notify_queue) { return; @@ -483,13 +529,14 @@ queue_local_notify(xmlNode * notify_src, const char *client_id, gboolean sync_re } static void -parse_local_options_v1(const pcmk__client_t *cib_client, int call_type, - int call_options, const char *host, const char *op, - gboolean *local_notify, gboolean *needs_reply, - gboolean *process, gboolean *needs_forward) +parse_local_options_v1(const pcmk__client_t *cib_client, + const cib__operation_t *operation, int call_options, + const char *host, const char *op, gboolean *local_notify, + gboolean *needs_reply, gboolean *process, + gboolean *needs_forward) { - if (cib_op_modifies(call_type) - && !(call_options & cib_inhibit_bcast)) { + if (pcmk_is_set(operation->flags, cib__op_attr_modifies) + && !pcmk_is_set(call_options, cib_inhibit_bcast)) { /* we need to send an update anyway */ *needs_reply = TRUE; } else { @@ -526,78 +573,87 @@ parse_local_options_v1(const pcmk__client_t *cib_client, int call_type, } static void -parse_local_options_v2(const pcmk__client_t *cib_client, int call_type, - int call_options, const char *host, const char *op, - gboolean *local_notify, gboolean *needs_reply, - gboolean *process, gboolean *needs_forward) +parse_local_options_v2(const pcmk__client_t *cib_client, + const cib__operation_t *operation, int call_options, + const char *host, const char *op, gboolean *local_notify, + gboolean *needs_reply, gboolean *process, + gboolean *needs_forward) { - if (cib_op_modifies(call_type)) { - if (pcmk__str_any_of(op, PCMK__CIB_REQUEST_PRIMARY, - PCMK__CIB_REQUEST_SECONDARY, NULL)) { - /* Always handle these locally */ - *process = TRUE; - *needs_reply = FALSE; - *local_notify = TRUE; - *needs_forward = FALSE; - return; - - } else { - /* Redirect all other updates via CPG */ - *needs_reply = TRUE; - *needs_forward = TRUE; - *process = FALSE; - crm_trace("%s op from %s needs to be forwarded to client %s", - op, pcmk__client_name(cib_client), - pcmk__s(host, "the primary instance")); - return; - } - } - - + // Process locally and notify local client *process = TRUE; *needs_reply = FALSE; *local_notify = TRUE; *needs_forward = FALSE; - if (stand_alone) { - crm_trace("Processing %s op from client %s (stand-alone)", + if (pcmk_is_set(operation->flags, cib__op_attr_local)) { + /* Always process locally if cib__op_attr_local is set. + * + * @COMPAT: Currently host is ignored. At a compatibility break, throw + * an error (from cib_process_request() or earlier) if host is not NULL or + * OUR_NODENAME. + */ + crm_trace("Processing always-local %s op from client %s", op, pcmk__client_name(cib_client)); - } else if (host == NULL) { - crm_trace("Processing unaddressed %s op from client %s", - op, pcmk__client_name(cib_client)); + if (!pcmk__str_eq(host, OUR_NODENAME, + pcmk__str_casei|pcmk__str_null_matches)) { - } else if (pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei)) { - crm_trace("Processing locally addressed %s op from client %s", + crm_warn("Operation '%s' is always local but its target host is " + "set to '%s'", + op, host); + } + return; + } + + if (pcmk_is_set(operation->flags, cib__op_attr_modifies) + || !pcmk__str_eq(host, OUR_NODENAME, + pcmk__str_casei|pcmk__str_null_matches)) { + + // Forward modifying and non-local requests via cluster + *process = FALSE; + *needs_reply = FALSE; + *local_notify = FALSE; + *needs_forward = TRUE; + + crm_trace("%s op from %s needs to be forwarded to %s", + op, pcmk__client_name(cib_client), + pcmk__s(host, "all nodes")); + return; + } + + if (stand_alone) { + crm_trace("Processing %s op from client %s (stand-alone)", op, pcmk__client_name(cib_client)); } else { - crm_trace("%s op from %s needs to be forwarded to client %s", - op, pcmk__client_name(cib_client), host); - *needs_forward = TRUE; - *process = FALSE; + crm_trace("Processing %saddressed %s op from client %s", + ((host != NULL)? "locally " : "un"), + op, pcmk__client_name(cib_client)); } } static void -parse_local_options(const pcmk__client_t *cib_client, int call_type, - int call_options, const char *host, const char *op, - gboolean *local_notify, gboolean *needs_reply, - gboolean *process, gboolean *needs_forward) +parse_local_options(const pcmk__client_t *cib_client, + const cib__operation_t *operation, int call_options, + const char *host, const char *op, gboolean *local_notify, + gboolean *needs_reply, gboolean *process, + gboolean *needs_forward) { if(cib_legacy_mode()) { - parse_local_options_v1(cib_client, call_type, call_options, host, - op, local_notify, needs_reply, process, needs_forward); + parse_local_options_v1(cib_client, operation, call_options, host, + op, local_notify, needs_reply, process, + needs_forward); } else { - parse_local_options_v2(cib_client, call_type, call_options, host, - op, local_notify, needs_reply, process, needs_forward); + parse_local_options_v2(cib_client, operation, call_options, host, + op, local_notify, needs_reply, process, + needs_forward); } } static gboolean -parse_peer_options_v1(int call_type, xmlNode * request, - gboolean * local_notify, gboolean * needs_reply, gboolean * process, - gboolean * needs_forward) +parse_peer_options_v1(const cib__operation_t *operation, xmlNode *request, + gboolean *local_notify, gboolean *needs_reply, + gboolean *process) { const char *op = NULL; const char *host = NULL; @@ -620,7 +676,8 @@ parse_peer_options_v1(int call_type, xmlNode * request, } op = crm_element_value(request, F_CIB_OPERATION); - crm_trace("Processing %s request sent by %s", op, originator); + crm_trace("Processing legacy %s request sent by %s", op, originator); + if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SHUTDOWN, pcmk__str_none)) { /* Always process these */ *local_notify = FALSE; @@ -693,9 +750,9 @@ parse_peer_options_v1(int call_type, xmlNode * request, } static gboolean -parse_peer_options_v2(int call_type, xmlNode * request, - gboolean * local_notify, gboolean * needs_reply, gboolean * process, - gboolean * needs_forward) +parse_peer_options_v2(const cib__operation_t *operation, xmlNode *request, + gboolean *local_notify, gboolean *needs_reply, + gboolean *process) { const char *host = NULL; const char *delegated = crm_element_value(request, F_CIB_DELEGATED); @@ -705,6 +762,10 @@ parse_peer_options_v2(int call_type, xmlNode * request, gboolean is_reply = pcmk__str_eq(reply_to, OUR_NODENAME, pcmk__str_casei); + if (originator == NULL) { // Shouldn't be possible + originator = "peer"; + } + if (pcmk__str_eq(op, PCMK__CIB_REQUEST_REPLACE, pcmk__str_none)) { /* sync_our_cib() sets F_CIB_ISREPLY */ if (reply_to) { @@ -734,10 +795,10 @@ parse_peer_options_v2(int call_type, xmlNode * request, const char *max = crm_element_value(request, F_CIB_SCHEMA_MAX); const char *upgrade_rc = crm_element_value(request, F_CIB_UPGRADE_RC); - crm_trace("Parsing %s operation%s for %s with max=%s and upgrade_rc=%s", - op, (is_reply? " reply" : ""), + crm_trace("Parsing upgrade %s for %s with max=%s and upgrade_rc=%s", + (is_reply? "reply" : "request"), (based_is_primary? "primary" : "secondary"), - (max? max : "none"), (upgrade_rc? upgrade_rc : "none")); + pcmk__s(max, "none"), pcmk__s(upgrade_rc, "none")); if (upgrade_rc != NULL) { // Our upgrade request was rejected by DC, notify clients of result @@ -752,7 +813,7 @@ parse_peer_options_v2(int call_type, xmlNode * request, goto skip_is_reply; } else { - // Ignore broadcast client requests when we're not DC + // Ignore broadcast client requests when we're not primary return FALSE; } @@ -762,22 +823,25 @@ parse_peer_options_v2(int call_type, xmlNode * request, legacy_mode = TRUE; return FALSE; - } else if (is_reply && cib_op_modifies(call_type)) { + } else if (is_reply + && pcmk_is_set(operation->flags, cib__op_attr_modifies)) { crm_trace("Ignoring legacy %s reply sent from %s to local clients", op, originator); return FALSE; } else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SHUTDOWN, pcmk__str_none)) { - /* Legacy handling */ - crm_debug("Legacy handling of %s message from %s", op, originator); *local_notify = FALSE; if (reply_to == NULL) { *process = TRUE; + } else { // Not possible? + crm_debug("Ignoring shutdown request from %s because reply_to=%s", + originator, reply_to); } return *process; } - if(is_reply) { - crm_trace("Handling %s reply sent from %s to local clients", op, originator); + if (is_reply) { + crm_trace("Will notify local clients for %s reply from %s", + op, originator); *process = FALSE; *needs_reply = FALSE; *local_notify = TRUE; @@ -797,62 +861,78 @@ parse_peer_options_v2(int call_type, xmlNode * request, return TRUE; } else if (host != NULL) { - /* this is for a specific instance and we're not it */ - crm_trace("Ignoring %s operation for instance on %s", op, host); + crm_trace("Ignoring %s request intended for CIB manager on %s", + op, host); return FALSE; } else if(is_reply == FALSE && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) { *needs_reply = TRUE; } - crm_trace("Processing %s request sent to everyone by %s/%s on %s %s", op, - crm_element_value(request, F_CIB_CLIENTNAME), - crm_element_value(request, F_CIB_CALLID), - originator, (*local_notify)?"(notify)":""); + crm_trace("Processing %s request broadcast by %s call %s on %s " + "(local clients will%s be notified)", op, + pcmk__s(crm_element_value(request, F_CIB_CLIENTNAME), "client"), + pcmk__s(crm_element_value(request, F_CIB_CALLID), "without ID"), + originator, (*local_notify? "" : "not")); return TRUE; } static gboolean -parse_peer_options(int call_type, xmlNode * request, - gboolean * local_notify, gboolean * needs_reply, gboolean * process, - gboolean * needs_forward) +parse_peer_options(const cib__operation_t *operation, xmlNode *request, + gboolean *local_notify, gboolean *needs_reply, + gboolean *process) { /* TODO: What happens when an update comes in after node A * requests the CIB from node B, but before it gets the reply (and * sends out the replace operation) */ if(cib_legacy_mode()) { - return parse_peer_options_v1( - call_type, request, local_notify, needs_reply, process, needs_forward); + return parse_peer_options_v1(operation, request, local_notify, + needs_reply, process); } else { - return parse_peer_options_v2( - call_type, request, local_notify, needs_reply, process, needs_forward); + return parse_peer_options_v2(operation, request, local_notify, + needs_reply, process); } } +/*! + * \internal + * \brief Forward a CIB request to the appropriate target host(s) + * + * \param[in] request CIB request to forward + */ static void -forward_request(xmlNode *request, int call_options) +forward_request(xmlNode *request) { const char *op = crm_element_value(request, F_CIB_OPERATION); + const char *section = crm_element_value(request, F_CIB_SECTION); const char *host = crm_element_value(request, F_CIB_HOST); + const char *originator = crm_element_value(request, F_ORIG); + const char *client_name = crm_element_value(request, F_CIB_CLIENTNAME); + const char *call_id = crm_element_value(request, F_CIB_CALLID); - crm_xml_add(request, F_CIB_DELEGATED, OUR_NODENAME); - - if (host != NULL) { - crm_trace("Forwarding %s op to %s", op, host); - send_cluster_message(crm_get_peer(0, host), crm_msg_cib, request, FALSE); + int log_level = LOG_INFO; - } else { - crm_trace("Forwarding %s op to primary instance", op); - send_cluster_message(NULL, crm_msg_cib, request, FALSE); + if (pcmk__str_eq(op, PCMK__CIB_REQUEST_NOOP, pcmk__str_none)) { + log_level = LOG_DEBUG; } - /* Return the request to its original state */ - xml_remove_prop(request, F_CIB_DELEGATED); + do_crm_log(log_level, + "Forwarding %s operation for section %s to %s (origin=%s/%s/%s)", + pcmk__s(op, "invalid"), + pcmk__s(section, "all"), + pcmk__s(host, (cib_legacy_mode()? "primary" : "all")), + pcmk__s(originator, "local"), + pcmk__s(client_name, "unspecified"), + pcmk__s(call_id, "unspecified")); - if (call_options & cib_discard_reply) { - crm_trace("Client not interested in reply"); - } + crm_xml_add(request, F_CIB_DELEGATED, OUR_NODENAME); + + send_cluster_message(((host != NULL)? crm_get_peer(0, host) : NULL), + crm_msg_cib, request, FALSE); + + // Return the request to its original state + xml_remove_prop(request, F_CIB_DELEGATED); } static gboolean @@ -861,9 +941,10 @@ send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gb CRM_ASSERT(msg != NULL); if (broadcast) { - /* this (successful) call modified the CIB _and_ the - * change needs to be broadcast... - * send via HA to other nodes + /* @COMPAT: Legacy code + * + * This successful call modified the CIB, and the change needs to be + * broadcast (sent via cluster to all nodes). */ int diff_add_updates = 0; int diff_add_epoch = 0; @@ -878,7 +959,7 @@ send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gb CRM_LOG_ASSERT(result_diff != NULL); digest = crm_element_value(result_diff, XML_ATTR_DIGEST); - crm_element_value_int(result_diff, "format", &format); + crm_element_value_int(result_diff, PCMK_XA_FORMAT, &format); cib_diff_version_details(result_diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, @@ -919,12 +1000,14 @@ send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gb * \param[in] privileged Whether privileged commands may be run * (see cib_server_ops[] definition) * \param[in] cib_client IPC client that sent request (or NULL if CPG) + * + * \return Legacy Pacemaker return code */ -static void +int cib_process_request(xmlNode *request, gboolean privileged, const pcmk__client_t *cib_client) { - int call_type = 0; + // @TODO: Break into multiple smaller functions int call_options = 0; gboolean process = TRUE; // Whether to process request locally now @@ -946,12 +1029,16 @@ cib_process_request(xmlNode *request, gboolean privileged, const char *client_name = crm_element_value(request, F_CIB_CLIENTNAME); const char *reply_to = crm_element_value(request, F_CIB_ISREPLY); + const cib__operation_t *operation = NULL; + cib__op_fn_t op_function = NULL; + crm_element_value_int(request, F_CIB_CALLOPTS, &call_options); if ((host != NULL) && (*host == '\0')) { host = NULL; } + // @TODO: Improve trace messages. Target is accurate only for legacy mode. if (host) { target = host; @@ -970,72 +1057,68 @@ cib_process_request(xmlNode *request, gboolean privileged, crm_trace("Processing local %s operation from %s/%s intended for %s", op, client_name, call_id, target); } - rc = cib_get_operation_id(op, &call_type); + rc = cib__get_operation(op, &operation); + rc = pcmk_rc2legacy(rc); if (rc != pcmk_ok) { /* TODO: construct error reply? */ crm_err("Pre-processing of command failed: %s", pcmk_strerror(rc)); - return; + return rc; + } + + op_function = based_get_op_function(operation); + if (op_function == NULL) { + crm_err("Operation %s not supported by CIB manager", op); + return -EOPNOTSUPP; } if (cib_client != NULL) { - parse_local_options(cib_client, call_type, call_options, host, op, - &local_notify, &needs_reply, &process, &needs_forward); + parse_local_options(cib_client, operation, call_options, host, op, + &local_notify, &needs_reply, &process, + &needs_forward); - } else if (parse_peer_options(call_type, request, &local_notify, - &needs_reply, &process, &needs_forward) == FALSE) { - return; + } else if (!parse_peer_options(operation, request, &local_notify, + &needs_reply, &process)) { + return rc; + } + + if (pcmk_is_set(call_options, cib_transaction)) { + /* All requests in a transaction are processed locally against a working + * CIB copy, and we don't notify for individual requests because the + * entire transaction is atomic. + * + * We still call the option parser functions above, for the sake of log + * messages and checking whether we're the target for peer requests. + */ + process = TRUE; + needs_reply = FALSE; + local_notify = FALSE; + needs_forward = FALSE; } - is_update = cib_op_modifies(call_type); + is_update = pcmk_is_set(operation->flags, cib__op_attr_modifies); - if (call_options & cib_discard_reply) { + if (pcmk_is_set(call_options, cib_discard_reply)) { /* If the request will modify the CIB, and we are in legacy mode, we * need to build a reply so we can broadcast a diff, even if the * requester doesn't want one. */ needs_reply = is_update && cib_legacy_mode(); local_notify = FALSE; + crm_trace("Client is not interested in the reply"); } if (needs_forward) { - const char *section = crm_element_value(request, F_CIB_SECTION); - int log_level = LOG_INFO; - - if (pcmk__str_eq(op, PCMK__CIB_REQUEST_NOOP, pcmk__str_none)) { - log_level = LOG_DEBUG; - } - - do_crm_log(log_level, - "Forwarding %s operation for section %s to %s (origin=%s/%s/%s)", - op, - section ? section : "'all'", - pcmk__s(host, (cib_legacy_mode() ? "primary" : "all")), - originator ? originator : "local", - client_name, call_id); - - forward_request(request, call_options); - return; + forward_request(request); + return rc; } if (cib_status != pcmk_ok) { - const char *call = crm_element_value(request, F_CIB_CALLID); - rc = cib_status; crm_err("Operation ignored, cluster configuration is invalid." " Please repair and restart: %s", pcmk_strerror(cib_status)); - op_reply = create_xml_node(NULL, "cib-reply"); - crm_xml_add(op_reply, F_TYPE, T_CIB); - crm_xml_add(op_reply, F_CIB_OPERATION, op); - crm_xml_add(op_reply, F_CIB_CALLID, call); - crm_xml_add(op_reply, F_CIB_CLIENTID, client_id); - crm_xml_add_int(op_reply, F_CIB_CALLOPTS, call_options); - crm_xml_add_int(op_reply, F_CIB_RC, rc); - - crm_trace("Attaching reply output"); - add_message_xml(op_reply, F_CIB_CALLDATA, the_cib); - - crm_log_xml_explicit(op_reply, "cib:reply"); + op_reply = create_cib_reply(op, call_id, client_id, call_options, rc, + the_cib); } else if (process) { time_t finished = 0; @@ -1043,7 +1126,8 @@ cib_process_request(xmlNode *request, gboolean privileged, int level = LOG_INFO; const char *section = crm_element_value(request, F_CIB_SECTION); - rc = cib_process_command(request, &op_reply, &result_diff, privileged); + rc = cib_process_command(request, operation, op_function, &op_reply, + &result_diff, privileged); if (!is_update) { level = LOG_TRACE; @@ -1120,10 +1204,9 @@ cib_process_request(xmlNode *request, gboolean privileged, op_reply = NULL; /* the reply is queued, so don't free here */ } - } else if (call_options & cib_discard_reply) { - crm_trace("Caller isn't interested in reply"); + } else if ((cib_client == NULL) + && !pcmk_is_set(call_options, cib_discard_reply)) { - } else if (cib_client == NULL) { if (is_update == FALSE || result_diff == NULL) { crm_trace("Request not broadcast: R/O call"); @@ -1158,24 +1241,51 @@ cib_process_request(xmlNode *request, gboolean privileged, free_xml(op_reply); free_xml(result_diff); - return; + return rc; } -static char * -calculate_section_digest(const char *xpath, xmlNode * xml_obj) +/*! + * \internal + * \brief Get a CIB operation's input from the request XML + * + * \param[in] request CIB request XML + * \param[in] type CIB operation type + * \param[out] section Where to store CIB section name + * + * \return Input XML for CIB operation + * + * \note If not \c NULL, the return value is a non-const pointer to part of + * \p request. The caller should not free it directly. + */ +static xmlNode * +prepare_input(const xmlNode *request, enum cib__op_type type, + const char **section) { - xmlNode *xml_section = NULL; + xmlNode *input = NULL; + + *section = NULL; + + switch (type) { + case cib__op_apply_patch: + if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) { + input = get_message_xml(request, F_CIB_UPDATE_DIFF); + } else { + input = get_message_xml(request, F_CIB_CALLDATA); + } + break; - if (xml_obj == NULL) { - return NULL; + default: + input = get_message_xml(request, F_CIB_CALLDATA); + *section = crm_element_value(request, F_CIB_SECTION); + break; } - xml_section = get_xpath_object(xpath, xml_obj, LOG_TRACE); - if (xml_section == NULL) { - return NULL; + // Grab the specified section + if ((*section != NULL) && pcmk__xe_is(input, XML_TAG_CIB)) { + input = pcmk_find_cib_element(input, *section); } - return calculate_xml_versioned_digest(xml_section, FALSE, TRUE, CRM_FEATURE_SET); + return input; } // v1 and v2 patch formats @@ -1201,14 +1311,14 @@ contains_config_change(xmlNode *diff) } static int -cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gboolean privileged) +cib_process_command(xmlNode *request, const cib__operation_t *operation, + cib__op_fn_t op_function, xmlNode **reply, + xmlNode **cib_diff, bool privileged) { xmlNode *input = NULL; xmlNode *output = NULL; xmlNode *result_cib = NULL; - xmlNode *current_cib = NULL; - int call_type = 0; int call_options = 0; const char *op = NULL; @@ -1216,24 +1326,15 @@ cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gb const char *call_id = crm_element_value(request, F_CIB_CALLID); const char *client_id = crm_element_value(request, F_CIB_CLIENTID); const char *client_name = crm_element_value(request, F_CIB_CLIENTNAME); - const char *origin = crm_element_value(request, F_ORIG); + const char *originator = crm_element_value(request, F_ORIG); int rc = pcmk_ok; - int rc2 = pcmk_ok; - gboolean send_r_notify = FALSE; - gboolean config_changed = FALSE; - gboolean manage_counters = TRUE; + bool config_changed = false; + bool manage_counters = true; static mainloop_timer_t *digest_timer = NULL; - char *current_nodes_digest = NULL; - char *current_alerts_digest = NULL; - char *current_status_digest = NULL; - uint32_t change_section = cib_change_section_nodes - |cib_change_section_alerts - |cib_change_section_status; - CRM_ASSERT(cib_status == pcmk_ok); if(digest_timer == NULL) { @@ -1242,91 +1343,64 @@ cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gb *reply = NULL; *cib_diff = NULL; - current_cib = the_cib; /* Start processing the request... */ op = crm_element_value(request, F_CIB_OPERATION); crm_element_value_int(request, F_CIB_CALLOPTS, &call_options); - rc = cib_get_operation_id(op, &call_type); - if (rc == pcmk_ok && privileged == FALSE) { - rc = cib_op_can_run(call_type, call_options, privileged); + if (!privileged && pcmk_is_set(operation->flags, cib__op_attr_privileged)) { + rc = -EACCES; + crm_trace("Failed due to lack of privileges: %s", pcmk_strerror(rc)); + goto done; } - rc2 = cib_op_prepare(call_type, request, &input, §ion); - if (rc == pcmk_ok) { - rc = rc2; - } + input = prepare_input(request, operation->type, §ion); - if (rc != pcmk_ok) { - crm_trace("Call setup failed: %s", pcmk_strerror(rc)); - goto done; - - } else if (cib_op_modifies(call_type) == FALSE) { - rc = cib_perform_op(op, call_options, cib_op_func(call_type), TRUE, - section, request, input, FALSE, &config_changed, - current_cib, &result_cib, NULL, &output); + if (!pcmk_is_set(operation->flags, cib__op_attr_modifies)) { + rc = cib_perform_op(op, call_options, op_function, true, section, + request, input, false, &config_changed, &the_cib, + &result_cib, NULL, &output); CRM_CHECK(result_cib == NULL, free_xml(result_cib)); goto done; } - /* Handle a valid write action */ + /* @COMPAT: Handle a valid write action (legacy) + * + * @TODO: Re-evaluate whether this is all truly legacy. The cib_force_diff + * portion is. However, F_CIB_GLOBAL_UPDATE may be set by a sync operation + * even in non-legacy mode, and manage_counters tells xml_create_patchset() + * whether to update version/epoch info. + */ if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) { - /* legacy code */ - manage_counters = FALSE; + manage_counters = false; cib__set_call_options(call_options, "call", cib_force_diff); crm_trace("Global update detected"); - CRM_CHECK(call_type == 3 || call_type == 4, crm_err("Call type: %d", call_type); - crm_log_xml_err(request, "bad op")); + CRM_LOG_ASSERT(pcmk__str_any_of(op, + PCMK__CIB_REQUEST_APPLY_PATCH, + PCMK__CIB_REQUEST_REPLACE, + NULL)); } ping_modified_since = TRUE; if (pcmk_is_set(call_options, cib_inhibit_bcast)) { crm_trace("Skipping update: inhibit broadcast"); - manage_counters = FALSE; - } - - if (!pcmk_is_set(call_options, cib_dryrun) - && pcmk__str_eq(section, XML_CIB_TAG_STATUS, pcmk__str_casei)) { - // Copying large CIBs accounts for a huge percentage of our CIB usage - cib__set_call_options(call_options, "call", cib_zero_copy); - } else { - cib__clear_call_options(call_options, "call", cib_zero_copy); - } - -#define XPATH_CONFIG "//" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION -#define XPATH_NODES XPATH_CONFIG "/" XML_CIB_TAG_NODES -#define XPATH_ALERTS XPATH_CONFIG "/" XML_CIB_TAG_ALERTS -#define XPATH_STATUS "//" XML_TAG_CIB "/" XML_CIB_TAG_STATUS - - // Calculate the hash value of the section before the change - if (pcmk__str_eq(PCMK__CIB_REQUEST_REPLACE, op, pcmk__str_none)) { - current_nodes_digest = calculate_section_digest(XPATH_NODES, - current_cib); - current_alerts_digest = calculate_section_digest(XPATH_ALERTS, - current_cib); - current_status_digest = calculate_section_digest(XPATH_STATUS, - current_cib); - crm_trace("current-digest %s:%s:%s", current_nodes_digest, - current_alerts_digest, current_status_digest); + manage_counters = false; } // result_cib must not be modified after cib_perform_op() returns - rc = cib_perform_op(op, call_options, cib_op_func(call_type), FALSE, - section, request, input, manage_counters, - &config_changed, current_cib, &result_cib, cib_diff, - &output); + rc = cib_perform_op(op, call_options, op_function, false, section, + request, input, manage_counters, &config_changed, + &the_cib, &result_cib, cib_diff, &output); + // @COMPAT: Legacy code if (!manage_counters) { int format = 1; - /* Legacy code - * If the diff is NULL at this point, it's because nothing changed - */ + // If the diff is NULL at this point, it's because nothing changed if (*cib_diff != NULL) { - crm_element_value_int(*cib_diff, "format", &format); + crm_element_value_int(*cib_diff, PCMK_XA_FORMAT, &format); } if (format == 1) { @@ -1334,92 +1408,60 @@ cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gb } } - /* Always write to disk for successful replace and upgrade ops. This also + /* Always write to disk for successful ops with the flag set. This also * negates the need to detect ordering changes. */ if ((rc == pcmk_ok) - && pcmk__str_any_of(op, - PCMK__CIB_REQUEST_REPLACE, - PCMK__CIB_REQUEST_UPGRADE, - NULL)) { - config_changed = TRUE; - } - - if (rc == pcmk_ok && !pcmk_is_set(call_options, cib_dryrun)) { - crm_trace("Activating %s->%s%s%s", - crm_element_value(current_cib, XML_ATTR_NUMUPDATES), - crm_element_value(result_cib, XML_ATTR_NUMUPDATES), - (pcmk_is_set(call_options, cib_zero_copy)? " zero-copy" : ""), - (config_changed? " changed" : "")); - if (!pcmk_is_set(call_options, cib_zero_copy)) { - rc = activateCibXml(result_cib, config_changed, op); - crm_trace("Activated %s (%d)", - crm_element_value(current_cib, XML_ATTR_NUMUPDATES), rc); - } + && pcmk_is_set(operation->flags, cib__op_attr_writes_through)) { - if ((rc == pcmk_ok) && contains_config_change(*cib_diff)) { - cib_read_config(config_hash, result_cib); - } + config_changed = true; + } - if (pcmk__str_eq(PCMK__CIB_REQUEST_REPLACE, op, pcmk__str_none)) { - char *result_nodes_digest = NULL; - char *result_alerts_digest = NULL; - char *result_status_digest = NULL; - - /* Calculate the hash value of the changed section. */ - result_nodes_digest = calculate_section_digest(XPATH_NODES, - result_cib); - result_alerts_digest = calculate_section_digest(XPATH_ALERTS, - result_cib); - result_status_digest = calculate_section_digest(XPATH_STATUS, - result_cib); - crm_trace("result-digest %s:%s:%s", result_nodes_digest, - result_alerts_digest, result_status_digest); - - if (pcmk__str_eq(current_nodes_digest, result_nodes_digest, - pcmk__str_none)) { - change_section = - pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, - "CIB change section", - "change_section", change_section, - cib_change_section_nodes, "nodes"); - } + if ((rc == pcmk_ok) + && !pcmk_any_flags_set(call_options, cib_dryrun|cib_transaction)) { - if (pcmk__str_eq(current_alerts_digest, result_alerts_digest, - pcmk__str_none)) { - change_section = - pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, - "CIB change section", - "change_section", change_section, - cib_change_section_alerts, "alerts"); + if (result_cib != the_cib) { + if (pcmk_is_set(operation->flags, cib__op_attr_writes_through)) { + config_changed = true; } - if (pcmk__str_eq(current_status_digest, result_status_digest, - pcmk__str_none)) { - change_section = - pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, - "CIB change section", - "change_section", change_section, - cib_change_section_status, "status"); - } + crm_trace("Activating %s->%s%s", + crm_element_value(the_cib, XML_ATTR_NUMUPDATES), + crm_element_value(result_cib, XML_ATTR_NUMUPDATES), + (config_changed? " changed" : "")); - if (change_section != cib_change_section_none) { - send_r_notify = TRUE; + rc = activateCibXml(result_cib, config_changed, op); + if (rc != pcmk_ok) { + crm_err("Failed to activate new CIB: %s", pcmk_strerror(rc)); } - - free(result_nodes_digest); - free(result_alerts_digest); - free(result_status_digest); + } + + if ((rc == pcmk_ok) && contains_config_change(*cib_diff)) { + cib_read_config(config_hash, result_cib); + } - } else if (pcmk__str_eq(PCMK__CIB_REQUEST_ERASE, op, pcmk__str_none)) { - send_r_notify = TRUE; + /* @COMPAT Nodes older than feature set 3.19.0 don't support + * transactions. In a mixed-version cluster with nodes <3.19.0, we must + * sync the updated CIB, so that the older nodes receive the changes. + * Any node that has already applied the transaction will ignore the + * synced CIB. + * + * To ensure the updated CIB is synced from only one node, we sync it + * from the originator. + */ + if ((operation->type == cib__op_commit_transact) + && pcmk__str_eq(originator, OUR_NODENAME, pcmk__str_casei) + && compare_version(crm_element_value(the_cib, XML_ATTR_CRM_VERSION), + "3.19.0") < 0) { + + sync_our_cib(request, TRUE); } mainloop_timer_stop(digest_timer); mainloop_timer_start(digest_timer); } else if (rc == -pcmk_err_schema_validation) { - CRM_ASSERT(!pcmk_is_set(call_options, cib_zero_copy)); + CRM_ASSERT(result_cib != the_cib); if (output != NULL) { crm_log_xml_info(output, "cib:output"); @@ -1432,61 +1474,31 @@ cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gb crm_trace("Not activating %d %d %s", rc, pcmk_is_set(call_options, cib_dryrun), crm_element_value(result_cib, XML_ATTR_NUMUPDATES)); - if (!pcmk_is_set(call_options, cib_zero_copy)) { + + if (result_cib != the_cib) { free_xml(result_cib); } } - if ((call_options & (cib_inhibit_notify|cib_dryrun)) == 0) { + if (!pcmk_any_flags_set(call_options, + cib_dryrun|cib_inhibit_notify|cib_transaction)) { crm_trace("Sending notifications %d", pcmk_is_set(call_options, cib_dryrun)); - cib_diff_notify(op, rc, call_id, client_id, client_name, origin, input, - *cib_diff); + cib_diff_notify(op, rc, call_id, client_id, client_name, originator, + input, *cib_diff); } - if (send_r_notify) { - cib_replace_notify(op, rc, call_id, client_id, client_name, origin, - the_cib, *cib_diff, change_section); - } - - pcmk__output_set_log_level(logger_out, LOG_TRACE); - logger_out->message(logger_out, "xml-patchset", *cib_diff); + pcmk__log_xml_patchset(LOG_TRACE, *cib_diff); done: if (!pcmk_is_set(call_options, cib_discard_reply) || cib_legacy_mode()) { - const char *caller = crm_element_value(request, F_CIB_CLIENTID); - - *reply = create_xml_node(NULL, "cib-reply"); - crm_xml_add(*reply, F_TYPE, T_CIB); - crm_xml_add(*reply, F_CIB_OPERATION, op); - crm_xml_add(*reply, F_CIB_CALLID, call_id); - crm_xml_add(*reply, F_CIB_CLIENTID, caller); - crm_xml_add_int(*reply, F_CIB_CALLOPTS, call_options); - crm_xml_add_int(*reply, F_CIB_RC, rc); - - if (output != NULL) { - crm_trace("Attaching reply output"); - add_message_xml(*reply, F_CIB_CALLDATA, output); - } - - crm_log_xml_explicit(*reply, "cib:reply"); + *reply = create_cib_reply(op, call_id, client_id, call_options, rc, + output); } - crm_trace("cleanup"); - - if (cib_op_modifies(call_type) == FALSE && output != current_cib) { + if (output != the_cib) { free_xml(output); - output = NULL; - } - - if (call_type >= 0) { - cib_op_cleanup(call_type, call_options, &input, &output); } - - free(current_nodes_digest); - free(current_alerts_digest); - free(current_status_digest); - crm_trace("done"); return rc; } @@ -1554,12 +1566,12 @@ initiate_exit(void) xmlNode *leaving = NULL; active = crm_active_peers(); - if (active < 2) { + if (active < 2) { // This is the last active node terminate_cib(__func__, 0); return; } - crm_info("Sending disconnect notification to %d peers...", active); + crm_info("Sending shutdown request to %d peers", active); leaving = create_xml_node(NULL, "exit-notification"); crm_xml_add(leaving, F_TYPE, "cib"); @@ -1664,12 +1676,6 @@ terminate_cib(const char *caller, int fast) uninitializeCib(); - if (logger_out != NULL) { - logger_out->finish(logger_out, CRM_EX_OK, true, NULL); - pcmk__output_free(logger_out); - logger_out = NULL; - } - if (fast > 0) { /* Quit fast on error */ pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm); diff --git a/daemons/based/based_common.c b/daemons/based/based_common.c deleted file mode 100644 index 7e68cf0..0000000 --- a/daemons/based/based_common.c +++ /dev/null @@ -1,352 +0,0 @@ -/* - * Copyright 2008-2023 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * - * This source code is licensed under the GNU General Public License version 2 - * or later (GPLv2+) WITHOUT ANY WARRANTY. - */ - -#include <crm_internal.h> - -#include <sys/param.h> -#include <stdio.h> -#include <sys/types.h> -#include <unistd.h> - -#include <stdlib.h> -#include <errno.h> -#include <fcntl.h> - -#include <crm/crm.h> -#include <crm/cib.h> -#include <crm/msg_xml.h> -#include <crm/common/ipc.h> -#include <crm/cluster.h> - -#include <crm/common/xml.h> - -#include <pacemaker-based.h> - -gboolean stand_alone = FALSE; - -extern int cib_perform_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, - gboolean privileged); - -static xmlNode * -cib_prepare_common(xmlNode * root, const char *section) -{ - xmlNode *data = NULL; - - /* extract the CIB from the fragment */ - if (root == NULL) { - return NULL; - - } else if (pcmk__strcase_any_of(crm_element_name(root), XML_TAG_FRAGMENT, - F_CRM_DATA, F_CIB_CALLDATA, NULL)) { - data = first_named_child(root, XML_TAG_CIB); - - } else { - data = root; - } - - /* grab the section specified for the command */ - if (section != NULL && data != NULL && pcmk__str_eq(crm_element_name(data), XML_TAG_CIB, pcmk__str_none)) { - data = pcmk_find_cib_element(data, section); - } - - /* crm_log_xml_trace(root, "cib:input"); */ - return data; -} - -static int -cib_prepare_none(xmlNode * request, xmlNode ** data, const char **section) -{ - *data = NULL; - *section = crm_element_value(request, F_CIB_SECTION); - return pcmk_ok; -} - -static int -cib_prepare_data(xmlNode * request, xmlNode ** data, const char **section) -{ - xmlNode *input_fragment = get_message_xml(request, F_CIB_CALLDATA); - - *section = crm_element_value(request, F_CIB_SECTION); - *data = cib_prepare_common(input_fragment, *section); - /* crm_log_xml_debug(*data, "data"); */ - return pcmk_ok; -} - -static int -cib_prepare_sync(xmlNode * request, xmlNode ** data, const char **section) -{ - *data = NULL; - *section = crm_element_value(request, F_CIB_SECTION); - return pcmk_ok; -} - -static int -cib_prepare_diff(xmlNode * request, xmlNode ** data, const char **section) -{ - xmlNode *input_fragment = NULL; - - *data = NULL; - *section = NULL; - - if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) { - input_fragment = get_message_xml(request, F_CIB_UPDATE_DIFF); - } else { - input_fragment = get_message_xml(request, F_CIB_CALLDATA); - } - - CRM_CHECK(input_fragment != NULL, crm_log_xml_warn(request, "no input")); - *data = cib_prepare_common(input_fragment, NULL); - return pcmk_ok; -} - -static int -cib_cleanup_query(int options, xmlNode ** data, xmlNode ** output) -{ - CRM_LOG_ASSERT(*data == NULL); - if ((options & cib_no_children) - || pcmk__str_eq(crm_element_name(*output), "xpath-query", pcmk__str_casei)) { - free_xml(*output); - } - return pcmk_ok; -} - -static int -cib_cleanup_data(int options, xmlNode ** data, xmlNode ** output) -{ - free_xml(*output); - *data = NULL; - return pcmk_ok; -} - -static int -cib_cleanup_output(int options, xmlNode ** data, xmlNode ** output) -{ - free_xml(*output); - return pcmk_ok; -} - -static int -cib_cleanup_none(int options, xmlNode ** data, xmlNode ** output) -{ - CRM_LOG_ASSERT(*data == NULL); - CRM_LOG_ASSERT(*output == NULL); - return pcmk_ok; -} - -static cib_operation_t cib_server_ops[] = { - // Booleans are modifies_cib, needs_privileges - { - NULL, FALSE, FALSE, - cib_prepare_none, cib_cleanup_none, cib_process_default - }, - { - PCMK__CIB_REQUEST_QUERY, FALSE, FALSE, - cib_prepare_none, cib_cleanup_query, cib_process_query - }, - { - PCMK__CIB_REQUEST_MODIFY, TRUE, TRUE, - cib_prepare_data, cib_cleanup_data, cib_process_modify - }, - { - PCMK__CIB_REQUEST_APPLY_PATCH, TRUE, TRUE, - cib_prepare_diff, cib_cleanup_data, cib_server_process_diff - }, - { - PCMK__CIB_REQUEST_REPLACE, TRUE, TRUE, - cib_prepare_data, cib_cleanup_data, cib_process_replace_svr - }, - { - PCMK__CIB_REQUEST_CREATE, TRUE, TRUE, - cib_prepare_data, cib_cleanup_data, cib_process_create - }, - { - PCMK__CIB_REQUEST_DELETE, TRUE, TRUE, - cib_prepare_data, cib_cleanup_data, cib_process_delete - }, - { - PCMK__CIB_REQUEST_SYNC_TO_ALL, FALSE, TRUE, - cib_prepare_sync, cib_cleanup_none, cib_process_sync - }, - { - PCMK__CIB_REQUEST_BUMP, TRUE, TRUE, - cib_prepare_none, cib_cleanup_output, cib_process_bump - }, - { - PCMK__CIB_REQUEST_ERASE, TRUE, TRUE, - cib_prepare_none, cib_cleanup_output, cib_process_erase - }, - { - PCMK__CIB_REQUEST_NOOP, FALSE, FALSE, - cib_prepare_none, cib_cleanup_none, cib_process_default - }, - { - PCMK__CIB_REQUEST_ABS_DELETE, TRUE, TRUE, - cib_prepare_data, cib_cleanup_data, cib_process_delete_absolute - }, - { - PCMK__CIB_REQUEST_UPGRADE, TRUE, TRUE, - cib_prepare_none, cib_cleanup_output, cib_process_upgrade_server - }, - { - PCMK__CIB_REQUEST_SECONDARY, FALSE, TRUE, - cib_prepare_none, cib_cleanup_none, cib_process_readwrite - }, - { - PCMK__CIB_REQUEST_ALL_SECONDARY, FALSE, TRUE, - cib_prepare_none, cib_cleanup_none, cib_process_readwrite - }, - { - PCMK__CIB_REQUEST_SYNC_TO_ONE, FALSE, TRUE, - cib_prepare_sync, cib_cleanup_none, cib_process_sync_one - }, - { - PCMK__CIB_REQUEST_PRIMARY, TRUE, TRUE, - cib_prepare_data, cib_cleanup_data, cib_process_readwrite - }, - { - PCMK__CIB_REQUEST_IS_PRIMARY, FALSE, TRUE, - cib_prepare_none, cib_cleanup_none, cib_process_readwrite - }, - { - PCMK__CIB_REQUEST_SHUTDOWN, FALSE, TRUE, - cib_prepare_sync, cib_cleanup_none, cib_process_shutdown_req - }, - { - CRM_OP_PING, FALSE, FALSE, - cib_prepare_none, cib_cleanup_output, cib_process_ping - }, -}; - -int -cib_get_operation_id(const char *op, int *operation) -{ - static GHashTable *operation_hash = NULL; - - if (operation_hash == NULL) { - int lpc = 0; - int max_msg_types = PCMK__NELEM(cib_server_ops); - - operation_hash = pcmk__strkey_table(NULL, free); - for (lpc = 1; lpc < max_msg_types; lpc++) { - int *value = malloc(sizeof(int)); - - if(value) { - *value = lpc; - g_hash_table_insert(operation_hash, (gpointer) cib_server_ops[lpc].operation, value); - } - } - } - - if (op != NULL) { - int *value = g_hash_table_lookup(operation_hash, op); - - if (value) { - *operation = *value; - return pcmk_ok; - } - } - crm_err("Operation %s is not valid", op); - *operation = -1; - return -EINVAL; -} - -xmlNode * -cib_msg_copy(xmlNode * msg, gboolean with_data) -{ - int lpc = 0; - const char *field = NULL; - const char *value = NULL; - xmlNode *value_struct = NULL; - - static const char *field_list[] = { - F_XML_TAGNAME, - F_TYPE, - F_CIB_CLIENTID, - F_CIB_CALLOPTS, - F_CIB_CALLID, - F_CIB_OPERATION, - F_CIB_ISREPLY, - F_CIB_SECTION, - F_CIB_HOST, - F_CIB_RC, - F_CIB_DELEGATED, - F_CIB_OBJID, - F_CIB_OBJTYPE, - F_CIB_EXISTING, - F_CIB_SEENCOUNT, - F_CIB_TIMEOUT, - F_CIB_GLOBAL_UPDATE, - F_CIB_CLIENTNAME, - F_CIB_USER, - F_CIB_NOTIFY_TYPE, - F_CIB_NOTIFY_ACTIVATE - }; - - static const char *data_list[] = { - F_CIB_CALLDATA, - F_CIB_UPDATE, - F_CIB_UPDATE_RESULT - }; - - xmlNode *copy = create_xml_node(NULL, "copy"); - - CRM_ASSERT(copy != NULL); - - for (lpc = 0; lpc < PCMK__NELEM(field_list); lpc++) { - field = field_list[lpc]; - value = crm_element_value(msg, field); - if (value != NULL) { - crm_xml_add(copy, field, value); - } - } - for (lpc = 0; with_data && lpc < PCMK__NELEM(data_list); lpc++) { - field = data_list[lpc]; - value_struct = get_message_xml(msg, field); - if (value_struct != NULL) { - add_message_xml(copy, field, value_struct); - } - } - - return copy; -} - -cib_op_t * -cib_op_func(int call_type) -{ - return &(cib_server_ops[call_type].fn); -} - -gboolean -cib_op_modifies(int call_type) -{ - return cib_server_ops[call_type].modifies_cib; -} - -int -cib_op_can_run(int call_type, int call_options, bool privileged) -{ - if (!privileged && cib_server_ops[call_type].needs_privileges) { - return -EACCES; - } - return pcmk_ok; -} - -int -cib_op_prepare(int call_type, xmlNode * request, xmlNode ** input, const char **section) -{ - crm_trace("Prepare %d", call_type); - return cib_server_ops[call_type].prepare(request, input, section); -} - -int -cib_op_cleanup(int call_type, int options, xmlNode ** input, xmlNode ** output) -{ - crm_trace("Cleanup %d", call_type); - return cib_server_ops[call_type].cleanup(options, input, output); -} diff --git a/daemons/based/based_io.c b/daemons/based/based_io.c index fc34f39..f252ac1 100644 --- a/daemons/based/based_io.c +++ b/daemons/based/based_io.c @@ -1,5 +1,5 @@ /* - * Copyright 2004-2022 the Pacemaker project contributors + * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -22,6 +22,9 @@ #include <sys/wait.h> #include <sys/stat.h> +#include <glib.h> +#include <libxml/tree.h> + #include <crm/crm.h> #include <crm/cib.h> @@ -45,12 +48,15 @@ cib_rename(const char *old) umask(S_IWGRP | S_IWOTH | S_IROTH); new_fd = mkstemp(new); - crm_err("Archiving unusable file %s as %s", old, new); + if ((new_fd < 0) || (rename(old, new) < 0)) { - crm_perror(LOG_ERR, "Couldn't rename %s as %s", old, new); - crm_err("Disabling disk writes and continuing"); + crm_err("Couldn't archive unusable file %s (disabling disk writes and continuing)", + old); cib_writes_enabled = FALSE; + } else { + crm_err("Archived unusable file %s as %s", old, new); } + if (new_fd > 0) { close(new_fd); } @@ -107,7 +113,7 @@ static int cib_archive_filter(const struct dirent * a) if(stat(a_path, &s) != 0) { rc = errno; - crm_trace("%s - stat failed: %s (%d)", a->d_name, pcmk_strerror(rc), rc); + crm_trace("%s - stat failed: %s (%d)", a->d_name, pcmk_rc_str(rc), rc); rc = 0; } else if ((s.st_mode & S_IFREG) != S_IFREG) { @@ -189,7 +195,7 @@ readCibXmlFile(const char *dir, const char *file, gboolean discard_status) const char *name = NULL; const char *value = NULL; const char *validation = NULL; - const char *use_valgrind = getenv("PCMK_valgrind_enabled"); + const char *use_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED); xmlNode *root = NULL; xmlNode *status = NULL; @@ -214,7 +220,7 @@ readCibXmlFile(const char *dir, const char *file, gboolean discard_status) crm_warn("Primary configuration corrupt or unusable, trying backups in %s", cib_root); lpc = scandir(cib_root, &namelist, cib_archive_filter, cib_archive_sort); if (lpc < 0) { - crm_perror(LOG_NOTICE, "scandir(%s) failed", cib_root); + crm_err("scandir(%s) failed: %s", cib_root, pcmk_rc_str(errno)); } } @@ -418,7 +424,7 @@ write_cib_contents(gpointer p) pid = fork(); if (pid < 0) { - crm_perror(LOG_ERR, "Disabling disk writes after fork failure"); + crm_err("Disabling disk writes after fork failure: %s", pcmk_rc_str(errno)); cib_writes_enabled = FALSE; return FALSE; } diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c index d46456c..35d639a 100644 --- a/daemons/based/based_messages.c +++ b/daemons/based/based_messages.c @@ -19,6 +19,9 @@ #include <sys/param.h> #include <sys/types.h> +#include <glib.h> +#include <libxml/tree.h> + #include <crm/crm.h> #include <crm/cib/internal.h> #include <crm/msg_xml.h> @@ -61,25 +64,15 @@ cib_process_shutdown_req(const char *op, int options, const char *section, xmlNo return pcmk_ok; } +// @COMPAT: Remove when PCMK__CIB_REQUEST_NOOP is removed int -cib_process_default(const char *op, int options, const char *section, xmlNode * req, - xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, - xmlNode ** answer) +cib_process_noop(const char *op, int options, const char *section, xmlNode *req, + xmlNode *input, xmlNode *existing_cib, xmlNode **result_cib, + xmlNode **answer) { - int result = pcmk_ok; - crm_trace("Processing \"%s\" event", op); *answer = NULL; - - if (op == NULL) { - result = -EINVAL; - crm_err("No operation specified"); - - } else if (strcmp(PCMK__CIB_REQUEST_NOOP, op) != 0) { - result = -EPROTONOSUPPORT; - crm_err("Action [%s] is not supported by the CIB manager", op); - } - return result; + return pcmk_ok; } int @@ -158,10 +151,10 @@ cib_process_ping(const char *op, int options, const char *section, xmlNode * req // Append additional detail so the receiver can log the differences add_message_xml(*answer, F_CIB_CALLDATA, the_cib); }, - { + if (the_cib != NULL) { // Always include at least the version details - const char *tag = TYPE(the_cib); - xmlNode *shallow = create_xml_node(NULL, tag); + xmlNode *shallow = create_xml_node(NULL, + (const char *) the_cib->name); copy_in_properties(shallow, the_cib); add_message_xml(*answer, F_CIB_CALLDATA, shallow); @@ -250,7 +243,7 @@ cib_process_upgrade_server(const char *op, int options, const char *section, xml if (rc != pcmk_ok) { // Notify originating peer so it can notify its local clients - crm_node_t *origin = pcmk__search_cluster_node_cache(0, host); + crm_node_t *origin = pcmk__search_cluster_node_cache(0, host, NULL); crm_info("Rejecting upgrade request from %s: %s " CRM_XS " rc=%d peer=%s", host, pcmk_strerror(rc), rc, @@ -341,8 +334,7 @@ cib_server_process_diff(const char *op, int options, const char *section, xmlNod crm_warn("Requesting full CIB refresh because update failed: %s" CRM_XS " rc=%d", pcmk_strerror(rc), rc); - pcmk__output_set_log_level(logger_out, LOG_INFO); - logger_out->message(logger_out, "xml-patchset", input); + pcmk__log_xml_patchset(LOG_INFO, input); free_xml(*result_cib); *result_cib = NULL; send_sync_request(NULL); @@ -356,15 +348,16 @@ cib_process_replace_svr(const char *op, int options, const char *section, xmlNod xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { - const char *tag = crm_element_name(input); int rc = cib_process_replace(op, options, section, req, input, existing_cib, result_cib, answer); - if (rc == pcmk_ok && pcmk__str_eq(tag, XML_TAG_CIB, pcmk__str_casei)) { + + if ((rc == pcmk_ok) && pcmk__xe_is(input, XML_TAG_CIB)) { sync_in_progress = 0; } return rc; } +// @COMPAT: Remove when PCMK__CIB_REQUEST_ABS_DELETE is removed int cib_process_delete_absolute(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, @@ -373,6 +366,49 @@ cib_process_delete_absolute(const char *op, int options, const char *section, xm return -EINVAL; } +static xmlNode * +cib_msg_copy(xmlNode *msg) +{ + static const char *field_list[] = { + F_XML_TAGNAME, + F_TYPE, + F_CIB_CLIENTID, + F_CIB_CALLOPTS, + F_CIB_CALLID, + F_CIB_OPERATION, + F_CIB_ISREPLY, + F_CIB_SECTION, + F_CIB_HOST, + F_CIB_RC, + F_CIB_DELEGATED, + F_CIB_OBJID, + F_CIB_OBJTYPE, + F_CIB_EXISTING, + F_CIB_SEENCOUNT, + F_CIB_TIMEOUT, + F_CIB_GLOBAL_UPDATE, + F_CIB_CLIENTNAME, + F_CIB_USER, + F_CIB_NOTIFY_TYPE, + F_CIB_NOTIFY_ACTIVATE + }; + + xmlNode *copy = create_xml_node(NULL, "copy"); + + CRM_ASSERT(copy != NULL); + + for (int lpc = 0; lpc < PCMK__NELEM(field_list); lpc++) { + const char *field = field_list[lpc]; + const char *value = crm_element_value(msg, field); + + if (value != NULL) { + crm_xml_add(copy, field, value); + } + } + + return copy; +} + int sync_our_cib(xmlNode * request, gboolean all) { @@ -384,22 +420,12 @@ sync_our_cib(xmlNode * request, gboolean all) xmlNode *replace_request = NULL; CRM_CHECK(the_cib != NULL, return -EINVAL); - - replace_request = cib_msg_copy(request, FALSE); - CRM_CHECK(replace_request != NULL, return -EINVAL); + CRM_CHECK(all || (host != NULL), return -EINVAL); crm_debug("Syncing CIB to %s", all ? "all peers" : host); - if (all == FALSE && host == NULL) { - crm_log_xml_err(request, "bad sync"); - } - /* remove the "all == FALSE" condition - * - * sync_from was failing, the local client wasn't being notified - * because it didn't know it was a reply - * setting this does not prevent the other nodes from applying it - * if all == TRUE - */ + replace_request = cib_msg_copy(request); + if (host != NULL) { crm_xml_add(replace_request, F_CIB_ISREPLY, host); } @@ -425,3 +451,30 @@ sync_our_cib(xmlNode * request, gboolean all) free(digest); return result; } + +int +cib_process_commit_transaction(const char *op, int options, const char *section, + xmlNode *req, xmlNode *input, + xmlNode *existing_cib, xmlNode **result_cib, + xmlNode **answer) +{ + /* On success, our caller will activate *result_cib locally, trigger a + * replace notification if appropriate, and sync *result_cib to all nodes. + * On failure, our caller will free *result_cib. + */ + int rc = pcmk_rc_ok; + const char *client_id = crm_element_value(req, F_CIB_CLIENTID); + const char *origin = crm_element_value(req, F_ORIG); + pcmk__client_t *client = pcmk__find_client_by_id(client_id); + + rc = based_commit_transaction(input, client, origin, result_cib); + + if (rc != pcmk_rc_ok) { + char *source = based_transaction_source_str(client, origin); + + crm_err("Could not commit transaction for %s: %s", + source, pcmk_rc_str(rc)); + free(source); + } + return pcmk_rc2legacy(rc); +} diff --git a/daemons/based/based_notify.c b/daemons/based/based_notify.c index 5881f6d..00a4c54 100644 --- a/daemons/based/based_notify.c +++ b/daemons/based/based_notify.c @@ -21,6 +21,9 @@ #include <time.h> +#include <glib.h> +#include <libxml/tree.h> + #include <crm/crm.h> #include <crm/cib/internal.h> #include <crm/msg_xml.h> @@ -30,7 +33,7 @@ #include <pacemaker-based.h> struct cib_notification_s { - xmlNode *msg; + const xmlNode *msg; struct iovec *iov; int32_t iov_size; }; @@ -58,10 +61,6 @@ cib_notify_send_one(gpointer key, gpointer value, gpointer user_data) do_send = TRUE; - } else if (pcmk_is_set(client->flags, cib_notify_replace) - && pcmk__str_eq(type, T_CIB_REPLACE_NOTIFY, pcmk__str_casei)) { - do_send = TRUE; - } else if (pcmk_is_set(client->flags, cib_notify_confirm) && pcmk__str_eq(type, T_CIB_UPDATE_CONFIRM, pcmk__str_casei)) { do_send = TRUE; @@ -104,7 +103,7 @@ cib_notify_send_one(gpointer key, gpointer value, gpointer user_data) } static void -cib_notify_send(xmlNode * xml) +cib_notify_send(const xmlNode *xml) { struct iovec *iov; struct cib_notification_s update; @@ -198,15 +197,16 @@ cib_diff_notify(const char *op, int result, const char *call_id, crm_xml_add(update_msg, F_SUBTYPE, T_CIB_DIFF_NOTIFY); crm_xml_add(update_msg, F_CIB_OPERATION, op); crm_xml_add(update_msg, F_CIB_CLIENTID, client_id); + crm_xml_add(update_msg, F_CIB_CLIENTNAME, client_name); crm_xml_add(update_msg, F_CIB_CALLID, call_id); crm_xml_add(update_msg, F_ORIG, origin); crm_xml_add_int(update_msg, F_CIB_RC, result); if (update != NULL) { - type = crm_element_name(update); + type = (const char *) update->name; crm_trace("Setting type to update->name: %s", type); } else { - type = crm_element_name(diff); + type = (const char *) diff->name; crm_trace("Setting type to new_obj->name: %s", type); } crm_xml_add(update_msg, F_CIB_OBJID, ID(diff)); @@ -218,88 +218,7 @@ cib_diff_notify(const char *op, int result, const char *call_id, } add_message_xml(update_msg, F_CIB_UPDATE_RESULT, diff); + crm_log_xml_trace(update_msg, "diff-notify"); cib_notify_send(update_msg); free_xml(update_msg); } - -void -cib_replace_notify(const char *op, int result, const char *call_id, - const char *client_id, const char *client_name, - const char *origin, xmlNode *update, xmlNode *diff, - uint32_t change_section) -{ - xmlNode *replace_msg = NULL; - - int add_updates = 0; - int add_epoch = 0; - int add_admin_epoch = 0; - - int del_updates = 0; - int del_epoch = 0; - int del_admin_epoch = 0; - - uint8_t log_level = LOG_INFO; - - if (diff == NULL) { - return; - } - - if (result != pcmk_ok) { - log_level = LOG_WARNING; - } - - cib_diff_version_details(diff, &add_admin_epoch, &add_epoch, &add_updates, - &del_admin_epoch, &del_epoch, &del_updates); - - if (del_updates < 0) { - crm_log_xml_debug(diff, "Bad replace diff"); - } - - if ((add_admin_epoch != del_admin_epoch) - || (add_epoch != del_epoch) - || (add_updates != del_updates)) { - - do_crm_log(log_level, - "Replaced CIB generation %d.%d.%d with %d.%d.%d from client " - "%s%s%s (%s) (%s)", - del_admin_epoch, del_epoch, del_updates, - add_admin_epoch, add_epoch, add_updates, - client_name, - ((call_id != NULL)? " call " : ""), pcmk__s(call_id, ""), - pcmk__s(origin, "unspecified peer"), pcmk_strerror(result)); - - } else if ((add_admin_epoch != 0) - || (add_epoch != 0) - || (add_updates != 0)) { - - do_crm_log(log_level, - "Local-only replace of CIB generation %d.%d.%d from client " - "%s%s%s (%s) (%s)", - add_admin_epoch, add_epoch, add_updates, - client_name, - ((call_id != NULL)? " call " : ""), pcmk__s(call_id, ""), - pcmk__s(origin, "unspecified peer"), pcmk_strerror(result)); - } - - replace_msg = create_xml_node(NULL, "notify-replace"); - - crm_xml_add(replace_msg, F_TYPE, T_CIB_NOTIFY); - crm_xml_add(replace_msg, F_SUBTYPE, T_CIB_REPLACE_NOTIFY); - crm_xml_add(replace_msg, F_CIB_OPERATION, op); - crm_xml_add(replace_msg, F_CIB_CLIENTID, client_id); - crm_xml_add(replace_msg, F_CIB_CALLID, call_id); - crm_xml_add(replace_msg, F_ORIG, origin); - crm_xml_add_int(replace_msg, F_CIB_RC, result); - crm_xml_add_ll(replace_msg, F_CIB_CHANGE_SECTION, - (long long) change_section); - attach_cib_generation(replace_msg, "cib-replace-generation", update); - - /* We can include update and diff if a replace callback needs them. Until - * then, avoid the overhead. - */ - - crm_log_xml_trace(replace_msg, "CIB replaced"); - - cib_notify_send(replace_msg); - free_xml(replace_msg); -} diff --git a/daemons/based/based_operation.c b/daemons/based/based_operation.c new file mode 100644 index 0000000..736d425 --- /dev/null +++ b/daemons/based/based_operation.c @@ -0,0 +1,59 @@ +/* + * Copyright 2008-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <glib.h> + +#include <crm/crm.h> +#include <crm/cib.h> +#include <pacemaker-based.h> + +static const cib__op_fn_t cib_op_functions[] = { + [cib__op_abs_delete] = cib_process_delete_absolute, + [cib__op_apply_patch] = cib_server_process_diff, + [cib__op_bump] = cib_process_bump, + [cib__op_commit_transact] = cib_process_commit_transaction, + [cib__op_create] = cib_process_create, + [cib__op_delete] = cib_process_delete, + [cib__op_erase] = cib_process_erase, + [cib__op_is_primary] = cib_process_readwrite, + [cib__op_modify] = cib_process_modify, + [cib__op_noop] = cib_process_noop, + [cib__op_ping] = cib_process_ping, + [cib__op_primary] = cib_process_readwrite, + [cib__op_query] = cib_process_query, + [cib__op_replace] = cib_process_replace_svr, + [cib__op_secondary] = cib_process_readwrite, + [cib__op_shutdown] = cib_process_shutdown_req, + [cib__op_sync_all] = cib_process_sync, + [cib__op_sync_one] = cib_process_sync_one, + [cib__op_upgrade] = cib_process_upgrade_server, +}; + +/*! + * \internal + * \brief Get the function that performs a given server-side CIB operation + * + * \param[in] operation Operation whose function to look up + * + * \return Function that performs \p operation within \c pacemaker-based + */ +cib__op_fn_t +based_get_op_function(const cib__operation_t *operation) +{ + enum cib__op_type type = operation->type; + + CRM_ASSERT(type >= 0); + + if (type >= PCMK__NELEM(cib_op_functions)) { + return NULL; + } + return cib_op_functions[type]; +} diff --git a/daemons/based/based_remote.c b/daemons/based/based_remote.c index 38136d2..4aa41fa 100644 --- a/daemons/based/based_remote.c +++ b/daemons/based/based_remote.c @@ -1,5 +1,5 @@ /* - * Copyright 2004-2021 the Pacemaker project contributors + * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -23,7 +23,9 @@ #include <stdlib.h> #include <errno.h> + #include <glib.h> +#include <libxml/tree.h> #include <crm/msg_xml.h> #include <crm/common/ipc.h> @@ -126,13 +128,13 @@ init_remote_listener(int port, gboolean encrypted) /* create server socket */ ssock = malloc(sizeof(int)); if(ssock == NULL) { - crm_perror(LOG_ERR, "Listener socket allocation failed"); + crm_err("Listener socket allocation failed: %s", pcmk_rc_str(errno)); return -1; } *ssock = socket(AF_INET, SOCK_STREAM, 0); if (*ssock == -1) { - crm_perror(LOG_ERR, "Listener socket creation failed"); + crm_err("Listener socket creation failed: %s", pcmk_rc_str(errno)); free(ssock); return -1; } @@ -141,8 +143,8 @@ init_remote_listener(int port, gboolean encrypted) optval = 1; rc = setsockopt(*ssock, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)); if (rc < 0) { - crm_perror(LOG_WARNING, - "Local address reuse not allowed on listener socket"); + crm_err("Local address reuse not allowed on listener socket: %s", + pcmk_rc_str(errno)); } /* bind server socket */ @@ -151,13 +153,13 @@ init_remote_listener(int port, gboolean encrypted) saddr.sin_addr.s_addr = INADDR_ANY; saddr.sin_port = htons(port); if (bind(*ssock, (struct sockaddr *)&saddr, sizeof(saddr)) == -1) { - crm_perror(LOG_ERR, "Cannot bind to listener socket"); + crm_err("Cannot bind to listener socket: %s", pcmk_rc_str(errno)); close(*ssock); free(ssock); return -2; } if (listen(*ssock, 10) == -1) { - crm_perror(LOG_ERR, "Cannot listen on socket"); + crm_err("Cannot listen on socket: %s", pcmk_rc_str(errno)); close(*ssock); free(ssock); return -3; @@ -222,9 +224,9 @@ cib_remote_auth(xmlNode * login) return FALSE; } - tmp = crm_element_name(login); - if (!pcmk__str_eq(tmp, "cib_command", pcmk__str_casei)) { - crm_err("Wrong tag: %s", tmp); + if (!pcmk__xe_is(login, T_CIB_COMMAND)) { + crm_err("Unrecognizable message from remote client"); + crm_log_xml_info(login, "bad"); return FALSE; } @@ -296,7 +298,7 @@ cib_remote_listen(gpointer data) memset(&addr, 0, sizeof(addr)); csock = accept(ssock, (struct sockaddr *)&addr, &laddr); if (csock == -1) { - crm_perror(LOG_ERR, "Could not accept socket connection"); + crm_err("Could not accept socket connection: %s", pcmk_rc_str(errno)); return TRUE; } @@ -411,9 +413,8 @@ cib_handle_remote_msg(pcmk__client_t *client, xmlNode *command) { const char *value = NULL; - value = crm_element_name(command); - if (!pcmk__str_eq(value, "cib_command", pcmk__str_casei)) { - crm_log_xml_trace(command, "Bad command: "); + if (!pcmk__xe_is(command, T_CIB_COMMAND)) { + crm_log_xml_trace(command, "bad"); return; } diff --git a/daemons/based/based_transaction.c b/daemons/based/based_transaction.c new file mode 100644 index 0000000..89aea2e --- /dev/null +++ b/daemons/based/based_transaction.c @@ -0,0 +1,167 @@ +/* + * Copyright 2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <glib.h> +#include <libxml/tree.h> + +#include "pacemaker-based.h" + +/*! + * \internal + * \brief Create a string describing the source of a commit-transaction request + * + * \param[in] client CIB client + * \param[in] origin Host where the commit request originated + * + * \return String describing the request source + * + * \note The caller is responsible for freeing the return value using \c free(). + */ +char * +based_transaction_source_str(const pcmk__client_t *client, const char *origin) +{ + char *source = NULL; + + if (client != NULL) { + source = crm_strdup_printf("client %s (%s)%s%s", + pcmk__client_name(client), + pcmk__s(client->id, "unidentified"), + ((origin != NULL)? " on " : ""), + pcmk__s(origin, "")); + + } else { + source = strdup((origin != NULL)? origin : "unknown source"); + } + + CRM_ASSERT(source != NULL); + return source; +} + +/*! + * \internal + * \brief Process requests in a transaction + * + * Stop when a request fails or when all requests have been processed. + * + * \param[in,out] transaction Transaction to process + * \param[in] client CIB client + * \param[in] source String describing the commit request source + * + * \return Standard Pacemaker return code + */ +static int +process_transaction_requests(xmlNodePtr transaction, + const pcmk__client_t *client, const char *source) +{ + for (xmlNodePtr request = first_named_child(transaction, T_CIB_COMMAND); + request != NULL; request = crm_next_same_xml(request)) { + + const char *op = crm_element_value(request, F_CIB_OPERATION); + const char *host = crm_element_value(request, F_CIB_HOST); + const cib__operation_t *operation = NULL; + int rc = cib__get_operation(op, &operation); + + if (rc == pcmk_rc_ok) { + if (!pcmk_is_set(operation->flags, cib__op_attr_transaction) + || (host != NULL)) { + + rc = EOPNOTSUPP; + } else { + /* Commit-transaction is a privileged operation. If we reached + * this point, the request came from a privileged connection. + */ + rc = cib_process_request(request, TRUE, client); + rc = pcmk_legacy2rc(rc); + } + } + + if (rc != pcmk_rc_ok) { + crm_err("Aborting CIB transaction for %s due to failed %s request: " + "%s", + source, op, pcmk_rc_str(rc)); + crm_log_xml_info(request, "Failed request"); + return rc; + } + + crm_trace("Applied %s request to transaction working CIB for %s", + op, source); + crm_log_xml_trace(request, "Successful request"); + } + + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Commit a given CIB client's transaction to a working CIB copy + * + * \param[in] transaction Transaction to commit + * \param[in] client CIB client + * \param[in] origin Host where the commit request originated + * \param[in,out] result_cib Where to store result CIB + * + * \return Standard Pacemaker return code + * + * \note This function is expected to be called only by + * \p cib_process_commit_transaction(). + * \note \p result_cib is expected to be a copy of the current CIB as created by + * \p cib_perform_op(). + * \note The caller is responsible for activating and syncing \p result_cib on + * success, and for freeing it on failure. + */ +int +based_commit_transaction(xmlNodePtr transaction, const pcmk__client_t *client, + const char *origin, xmlNodePtr *result_cib) +{ + xmlNodePtr saved_cib = the_cib; + int rc = pcmk_rc_ok; + char *source = NULL; + + CRM_ASSERT(result_cib != NULL); + + CRM_CHECK(pcmk__xe_is(transaction, T_CIB_TRANSACTION), + return pcmk_rc_no_transaction); + + /* *result_cib should be a copy of the_cib (created by cib_perform_op()). If + * not, make a copy now. Change tracking isn't strictly required here + * because: + * * Each request in the transaction will have changes tracked and ACLs + * checked if appropriate. + * * cib_perform_op() will infer changes for the commit request at the end. + */ + CRM_CHECK((*result_cib != NULL) && (*result_cib != the_cib), + *result_cib = copy_xml(the_cib)); + + source = based_transaction_source_str(client, origin); + crm_trace("Committing transaction for %s to working CIB", source); + + // Apply all changes to a working copy of the CIB + the_cib = *result_cib; + + rc = process_transaction_requests(transaction, client, origin); + + crm_trace("Transaction commit %s for %s", + ((rc == pcmk_rc_ok)? "succeeded" : "failed"), source); + + /* Some request types (for example, erase) may have freed the_cib (the + * working copy) and pointed it at a new XML object. In that case, it + * follows that *result_cib (the working copy) was freed. + * + * Point *result_cib at the updated working copy stored in the_cib. + */ + *result_cib = the_cib; + + // Point the_cib back to the unchanged original copy + the_cib = saved_cib; + + free(source); + return rc; +} diff --git a/daemons/based/based_transaction.h b/daemons/based/based_transaction.h new file mode 100644 index 0000000..9935c73 --- /dev/null +++ b/daemons/based/based_transaction.h @@ -0,0 +1,24 @@ +/* + * Copyright 2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef BASED_TRANSACTION__H +#define BASED_TRANSACTION__H + +#include <crm_internal.h> + +#include <libxml/tree.h> + +char *based_transaction_source_str(const pcmk__client_t *client, + const char *origin); + +int based_commit_transaction(xmlNodePtr transaction, + const pcmk__client_t *client, + const char *origin, xmlNodePtr *result_cib); + +#endif // BASED_TRANSACTION__H diff --git a/daemons/based/pacemaker-based.c b/daemons/based/pacemaker-based.c index 129997e..5dd7938 100644 --- a/daemons/based/pacemaker-based.c +++ b/daemons/based/pacemaker-based.c @@ -16,7 +16,8 @@ #include <bzlib.h> #include <sys/types.h> -#include <libxml/parser.h> +#include <glib.h> +#include <libxml/tree.h> #include <crm/crm.h> #include <crm/cib/internal.h> @@ -42,6 +43,7 @@ gchar *cib_root = NULL; static gboolean preserve_status = FALSE; gboolean cib_writes_enabled = TRUE; +gboolean stand_alone = FALSE; int remote_fd = 0; int remote_tls_fd = 0; @@ -49,8 +51,6 @@ int remote_tls_fd = 0; GHashTable *config_hash = NULL; GHashTable *local_notify_queue = NULL; -pcmk__output_t *logger_out = NULL; - static void cib_init(void); void cib_shutdown(int nsig); static bool startCib(const char *filename); @@ -197,15 +197,6 @@ main(int argc, char **argv) goto done; } - rc = pcmk__log_output_new(&logger_out); - if (rc != pcmk_rc_ok) { - exit_code = CRM_EX_ERROR; - g_set_error(&error, PCMK__EXITC_ERROR, exit_code, - "Error creating output format log: %s", pcmk_rc_str(rc)); - goto done; - } - pcmk__output_set_log_level(logger_out, LOG_TRACE); - mainloop_add_signal(SIGTERM, cib_shutdown); mainloop_add_signal(SIGPIPE, cib_enable_writes); @@ -230,7 +221,7 @@ main(int argc, char **argv) goto done; } - if (crm_ipc_connect(old_instance)) { + if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) { /* IPC end-point already up */ crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); diff --git a/daemons/based/pacemaker-based.h b/daemons/based/pacemaker-based.h index 05e49b3..33c7642 100644 --- a/daemons/based/pacemaker-based.h +++ b/daemons/based/pacemaker-based.h @@ -18,6 +18,9 @@ #include <errno.h> #include <fcntl.h> +#include <glib.h> +#include <libxml/tree.h> + #include <crm/crm.h> #include <crm/cib.h> #include <crm/common/xml.h> @@ -26,16 +29,19 @@ #include <crm/common/mainloop.h> #include <crm/cib/internal.h> +#include "based_transaction.h" + #ifdef HAVE_GNUTLS_GNUTLS_H # include <gnutls/gnutls.h> #endif +#define OUR_NODENAME (stand_alone? "localhost" : crm_cluster->uname) + // CIB-specific client flags enum cib_client_flags { // Notifications cib_notify_pre = (UINT64_C(1) << 0), cib_notify_post = (UINT64_C(1) << 1), - cib_notify_replace = (UINT64_C(1) << 2), cib_notify_confirm = (UINT64_C(1) << 3), cib_notify_diff = (UINT64_C(1) << 4), @@ -43,16 +49,6 @@ enum cib_client_flags { cib_is_daemon = (UINT64_C(1) << 12), }; -typedef struct cib_operation_s { - const char *operation; - gboolean modifies_cib; - gboolean needs_privileges; - int (*prepare) (xmlNode *, xmlNode **, const char **); - int (*cleanup) (int, xmlNode **, xmlNode **); - int (*fn) (const char *, int, const char *, xmlNode *, - xmlNode *, xmlNode *, xmlNode **, xmlNode **); -} cib_operation_t; - extern bool based_is_primary; extern GHashTable *config_hash; extern xmlNode *the_cib; @@ -67,7 +63,6 @@ extern gboolean stand_alone; extern gboolean cib_shutdown_flag; extern gchar *cib_root; extern int cib_status; -extern pcmk__output_t *logger_out; extern struct qb_ipcs_service_handlers ipc_ro_callbacks; extern struct qb_ipcs_service_handlers ipc_rw_callbacks; @@ -79,6 +74,8 @@ void cib_peer_callback(xmlNode *msg, void *private_data); void cib_common_callback_worker(uint32_t id, uint32_t flags, xmlNode *op_request, pcmk__client_t *cib_client, gboolean privileged); +int cib_process_request(xmlNode *request, gboolean privileged, + const pcmk__client_t *cib_client); void cib_shutdown(int nsig); void terminate_cib(const char *caller, int fast); gboolean cib_legacy_mode(void); @@ -92,9 +89,9 @@ int cib_process_shutdown_req(const char *op, int options, const char *section, xmlNode *req, xmlNode *input, xmlNode *existing_cib, xmlNode **result_cib, xmlNode **answer); -int cib_process_default(const char *op, int options, const char *section, - xmlNode *req, xmlNode *input, xmlNode *existing_cib, - xmlNode **result_cib, xmlNode **answer); +int cib_process_noop(const char *op, int options, const char *section, + xmlNode *req, xmlNode *input, xmlNode *existing_cib, + xmlNode **result_cib, xmlNode **answer); int cib_process_ping(const char *op, int options, const char *section, xmlNode *req, xmlNode *input, xmlNode *existing_cib, xmlNode **result_cib, xmlNode **answer); @@ -121,25 +118,17 @@ int cib_process_upgrade_server(const char *op, int options, const char *section, xmlNode *req, xmlNode *input, xmlNode *existing_cib, xmlNode **result_cib, xmlNode **answer); +int cib_process_commit_transaction(const char *op, int options, + const char *section, xmlNode *req, + xmlNode *input, xmlNode *existing_cib, + xmlNode **result_cib, xmlNode **answer); void send_sync_request(const char *host); int sync_our_cib(xmlNode *request, gboolean all); -xmlNode *cib_msg_copy(xmlNode *msg, gboolean with_data); -int cib_get_operation_id(const char *op, int *operation); -cib_op_t *cib_op_func(int call_type); -gboolean cib_op_modifies(int call_type); -int cib_op_prepare(int call_type, xmlNode *request, xmlNode **input, - const char **section); -int cib_op_cleanup(int call_type, int options, xmlNode **input, - xmlNode **output); -int cib_op_can_run(int call_type, int call_options, bool privileged); +cib__op_fn_t based_get_op_function(const cib__operation_t *operation); void cib_diff_notify(const char *op, int result, const char *call_id, const char *client_id, const char *client_name, const char *origin, xmlNode *update, xmlNode *diff); -void cib_replace_notify(const char *op, int result, const char *call_id, - const char *client_id, const char *client_name, - const char *origin, xmlNode *update, xmlNode *diff, - uint32_t change_section); static inline const char * cib_config_lookup(const char *opt) diff --git a/daemons/controld/Makefile.am b/daemons/controld/Makefile.am index 08be1ff..1312090 100644 --- a/daemons/controld/Makefile.am +++ b/daemons/controld/Makefile.am @@ -14,34 +14,20 @@ halibdir = $(CRM_DAEMON_DIR) halib_PROGRAMS = pacemaker-controld -noinst_HEADERS = controld_alerts.h \ - controld_callbacks.h \ - controld_cib.h \ - controld_fencing.h \ - controld_fsa.h \ - controld_globals.h \ - controld_lrm.h \ - controld_membership.h \ - controld_messages.h \ - controld_metadata.h \ - controld_throttle.h \ - controld_timers.h \ - controld_transition.h \ - controld_utils.h \ - pacemaker-controld.h +noinst_HEADERS = $(wildcard *.h) pacemaker_controld_CFLAGS = $(CFLAGS_HARDENED_EXE) pacemaker_controld_LDFLAGS = $(LDFLAGS_HARDENED_EXE) -pacemaker_controld_LDADD = $(top_builddir)/lib/fencing/libstonithd.la \ - $(top_builddir)/lib/pacemaker/libpacemaker.la \ - $(top_builddir)/lib/pengine/libpe_rules.la \ - $(top_builddir)/lib/cib/libcib.la \ - $(top_builddir)/lib/cluster/libcrmcluster.la \ - $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/services/libcrmservice.la \ - $(top_builddir)/lib/lrmd/liblrmd.la \ - $(CLUSTERLIBS) +pacemaker_controld_LDADD = $(top_builddir)/lib/pacemaker/libpacemaker.la +pacemaker_controld_LDADD += $(top_builddir)/lib/cib/libcib.la +pacemaker_controld_LDADD += $(top_builddir)/lib/pengine/libpe_rules.la +pacemaker_controld_LDADD += $(top_builddir)/lib/fencing/libstonithd.la +pacemaker_controld_LDADD += $(top_builddir)/lib/cluster/libcrmcluster.la +pacemaker_controld_LDADD += $(top_builddir)/lib/lrmd/liblrmd.la +pacemaker_controld_LDADD += $(top_builddir)/lib/services/libcrmservice.la +pacemaker_controld_LDADD += $(top_builddir)/lib/common/libcrmcommon.la +pacemaker_controld_LDADD += $(CLUSTERLIBS) pacemaker_controld_SOURCES = pacemaker-controld.c \ controld_alerts.c \ @@ -79,9 +65,11 @@ endif CLEANFILES = $(man7_MANS) if BUILD_LEGACY_LINKS +.PHONY: install-exec-hook install-exec-hook: cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f crmd && $(LN_S) pacemaker-controld crmd +.PHONY: uninstall-hook uninstall-hook: cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f crmd endif diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c index d578adc..7078739 100644 --- a/daemons/controld/controld_callbacks.c +++ b/daemons/controld/controld_callbacks.c @@ -1,5 +1,5 @@ /* - * Copyright 2004-2022 the Pacemaker project contributors + * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -107,6 +107,8 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d bool appeared = FALSE; bool is_remote = pcmk_is_set(node->flags, crm_remote_node); + controld_node_pending_timer(node); + /* The controller waits to receive some information from the membership * layer before declaring itself operational. If this is being called for a * cluster node, indicate that we have it. @@ -274,13 +276,14 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d if (down) { const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK); - if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) { + if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) { /* tengine_stonith_callback() confirms fence actions */ crm_trace("Updating CIB %s fencer reported fencing of %s complete", (pcmk_is_set(down->flags, pcmk__graph_action_confirmed)? "after" : "before"), node->uname); - } else if (!appeared && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { + } else if (!appeared && pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, + pcmk__str_casei)) { // Shutdown actions are immediately confirmed (i.e. no_wait) if (!is_remote) { @@ -342,6 +345,17 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d } } + if (!appeared && (type == crm_status_processes) + && (node->when_member > 1)) { + /* The node left CPG but is still a cluster member. Set its + * membership time to 1 to record it in the cluster state as a + * boolean, so we don't fence it due to node-pending-timeout. + */ + node->when_member = 1; + flags |= node_update_cluster; + controld_node_pending_timer(node); + } + /* Update the CIB node state */ update = create_node_state_update(node, flags, NULL, __func__); if (update == NULL) { diff --git a/daemons/controld/controld_cib.c b/daemons/controld/controld_cib.c index 94b99dd..865e41f 100644 --- a/daemons/controld/controld_cib.c +++ b/daemons/controld/controld_cib.c @@ -22,90 +22,6 @@ // Call ID of the most recent in-progress CIB resource update (or 0 if none) static int pending_rsc_update = 0; -// Call IDs of requested CIB replacements that won't trigger a new election -// (used as a set of gint values) -static GHashTable *cib_replacements = NULL; - -/*! - * \internal - * \brief Store the call ID of a CIB replacement that the controller requested - * - * The \p do_cib_replaced() callback function will avoid triggering a new - * election when we're notified of one of these expected replacements. - * - * \param[in] call_id CIB call ID (or 0 for a synchronous call) - * - * \note This function should be called after making any asynchronous CIB - * request (or before making any synchronous CIB request) that may replace - * part of the nodes or status section. This may include CIB sync calls. - */ -void -controld_record_cib_replace_call(int call_id) -{ - CRM_CHECK(call_id >= 0, return); - - if (cib_replacements == NULL) { - cib_replacements = g_hash_table_new(NULL, NULL); - } - - /* If the call ID is already present in the table, then it's old. We may not - * be removing them properly, and we could improperly ignore replacement - * notifications if cib_t:call_id wraps around. - */ - CRM_LOG_ASSERT(g_hash_table_add(cib_replacements, - GINT_TO_POINTER((gint) call_id))); -} - -/*! - * \internal - * \brief Remove the call ID of a CIB replacement from the replacements table - * - * \param[in] call_id CIB call ID (or 0 for a synchronous call) - * - * \return \p true if \p call_id was found in the table, or \p false otherwise - * - * \note CIB notifications run before CIB callbacks. If this function is called - * from within a callback, \p do_cib_replaced() will have removed - * \p call_id from the table first if relevant changes triggered a - * notification. - */ -bool -controld_forget_cib_replace_call(int call_id) -{ - CRM_CHECK(call_id >= 0, return false); - - if (cib_replacements == NULL) { - return false; - } - return g_hash_table_remove(cib_replacements, - GINT_TO_POINTER((gint) call_id)); -} - -/*! - * \internal - * \brief Empty the hash table containing call IDs of CIB replacement requests - */ -void -controld_forget_all_cib_replace_calls(void) -{ - if (cib_replacements != NULL) { - g_hash_table_remove_all(cib_replacements); - } -} - -/*! - * \internal - * \brief Free the hash table containing call IDs of CIB replacement requests - */ -void -controld_destroy_cib_replacements_table(void) -{ - if (cib_replacements != NULL) { - g_hash_table_destroy(cib_replacements); - cib_replacements = NULL; - } -} - /*! * \internal * \brief Respond to a dropped CIB connection @@ -127,54 +43,54 @@ handle_cib_disconnect(gpointer user_data) controld_clear_fsa_input_flags(R_CIB_CONNECTED); } else { // Expected - crm_info("Connection to the CIB manager terminated"); + crm_info("Disconnected from the CIB manager"); } } static void do_cib_updated(const char *event, xmlNode * msg) { - if (pcmk__alert_in_patchset(msg, TRUE)) { - controld_trigger_config(); + const xmlNode *patchset = NULL; + const char *client_name = NULL; + + crm_debug("Received CIB diff notification: DC=%s", pcmk__btoa(AM_I_DC)); + + if (cib__get_notify_patchset(msg, &patchset) != pcmk_rc_ok) { + return; } -} -static void -do_cib_replaced(const char *event, xmlNode * msg) -{ - int call_id = 0; - const char *client_id = crm_element_value(msg, F_CIB_CLIENTID); - uint32_t change_section = cib_change_section_nodes - |cib_change_section_status; - long long value = 0; + if (cib__element_in_patchset(patchset, XML_CIB_TAG_ALERTS) + || cib__element_in_patchset(patchset, XML_CIB_TAG_CRMCONFIG)) { + + controld_trigger_config(); + } - crm_debug("Updating the CIB after a replace: DC=%s", pcmk__btoa(AM_I_DC)); if (!AM_I_DC) { + // We're not in control of the join sequence return; } - if ((crm_element_value_int(msg, F_CIB_CALLID, &call_id) == 0) - && pcmk__str_eq(client_id, controld_globals.cib_client_id, - pcmk__str_none) - && controld_forget_cib_replace_call(call_id)) { - // We requested this replace op. No need to restart the join. + client_name = crm_element_value(msg, F_CIB_CLIENTNAME); + if (!cib__client_triggers_refresh(client_name)) { + // The CIB is still accurate return; } - if ((crm_element_value_ll(msg, F_CIB_CHANGE_SECTION, &value) < 0) - || (value < 0) || (value > UINT32_MAX)) { + if (cib__element_in_patchset(patchset, XML_CIB_TAG_NODES) + || cib__element_in_patchset(patchset, XML_CIB_TAG_STATUS)) { - crm_trace("Couldn't parse '%s' from message", F_CIB_CHANGE_SECTION); - } else { - change_section = (uint32_t) value; - } - - if (pcmk_any_flags_set(change_section, cib_change_section_nodes - |cib_change_section_status)) { + /* An unsafe client modified the nodes or status section. Ensure the + * node list is up-to-date, and start the join process again so we get + * everyone's current resource history. + */ + if (client_name == NULL) { + client_name = crm_element_value(msg, F_CIB_CLIENTID); + } + crm_notice("Populating nodes and starting an election after %s event " + "triggered by %s", + event, pcmk__s(client_name, "(unidentified client)")); - /* start the join process again so we get everyone's LRM status */ populate_cib_nodes(node_update_quick|node_update_all, __func__); - register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } } @@ -186,12 +102,10 @@ controld_disconnect_cib_manager(void) CRM_ASSERT(cib_conn != NULL); - crm_info("Disconnecting from the CIB manager"); + crm_debug("Disconnecting from the CIB manager"); controld_clear_fsa_input_flags(R_CIB_CONNECTED); - cib_conn->cmds->del_notify_callback(cib_conn, T_CIB_REPLACE_NOTIFY, - do_cib_replaced); cib_conn->cmds->del_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY, do_cib_updated); cib_free_callbacks(cib_conn); @@ -201,8 +115,6 @@ controld_disconnect_cib_manager(void) cib_scope_local|cib_discard_reply); cib_conn->cmds->signoff(cib_conn); } - - crm_notice("Disconnected from the CIB manager"); } /* A_CIB_STOP, A_CIB_START, O_CIB_RESTART */ @@ -217,7 +129,6 @@ do_cib_control(long long action, cib_t *cib_conn = controld_globals.cib_conn; void (*dnotify_fn) (gpointer user_data) = handle_cib_disconnect; - void (*replace_cb) (const char *event, xmlNodePtr msg) = do_cib_replaced; void (*update_cb) (const char *event, xmlNodePtr msg) = do_cib_updated; int rc = pcmk_ok; @@ -264,11 +175,6 @@ do_cib_control(long long action, crm_err("Could not set dnotify callback"); } else if (cib_conn->cmds->add_notify_callback(cib_conn, - T_CIB_REPLACE_NOTIFY, - replace_cb) != pcmk_ok) { - crm_err("Could not set CIB notification callback (replace)"); - - } else if (cib_conn->cmds->add_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY, update_cb) != pcmk_ok) { crm_err("Could not set CIB notification callback (update)"); @@ -276,8 +182,6 @@ do_cib_control(long long action, } else { controld_set_fsa_input_flags(R_CIB_CONNECTED); cib_retries = 0; - cib_conn->cmds->client_id(cib_conn, &controld_globals.cib_client_id, - NULL); } if (!pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { @@ -310,11 +214,12 @@ do_cib_control(long long action, unsigned int cib_op_timeout(void) { + // @COMPAT: Drop env_timeout at 3.0.0 static int env_timeout = -1; unsigned int calculated_timeout = 0; if (env_timeout == -1) { - const char *env = getenv("PCMK_cib_timeout"); + const char *env = pcmk__env_option(PCMK__ENV_CIB_TIMEOUT); pcmk__scan_min_int(env, &env_timeout, MIN_CIB_OP_TIMEOUT); crm_trace("Minimum CIB op timeout: %ds (environment: %s)", @@ -401,67 +306,87 @@ cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, /*! * \internal - * \brief Delete subsection of a node's CIB node_state + * \brief Get the XPath and description of a node state section to be deleted * - * \param[in] uname Desired node - * \param[in] section Subsection of node_state to delete - * \param[in] options CIB call options to use + * \param[in] uname Desired node + * \param[in] section Subsection of node_state to be deleted + * \param[out] xpath Where to store XPath of \p section + * \param[out] desc If not \c NULL, where to store description of \p section */ void -controld_delete_node_state(const char *uname, enum controld_section_e section, - int options) +controld_node_state_deletion_strings(const char *uname, + enum controld_section_e section, + char **xpath, char **desc) { - cib_t *cib_conn = controld_globals.cib_conn; - - char *xpath = NULL; - char *desc = NULL; + const char *desc_pre = NULL; // Shutdown locks that started before this time are expired long long expire = (long long) time(NULL) - controld_globals.shutdown_lock_limit; - CRM_CHECK(uname != NULL, return); switch (section) { case controld_section_lrm: - xpath = crm_strdup_printf(XPATH_NODE_LRM, uname); - desc = crm_strdup_printf("resource history for node %s", uname); + *xpath = crm_strdup_printf(XPATH_NODE_LRM, uname); + desc_pre = "resource history"; break; case controld_section_lrm_unlocked: - xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED, - uname, uname, expire); - desc = crm_strdup_printf("resource history (other than shutdown " - "locks) for node %s", uname); + *xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED, + uname, uname, expire); + desc_pre = "resource history (other than shutdown locks)"; break; case controld_section_attrs: - xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname); - desc = crm_strdup_printf("transient attributes for node %s", uname); + *xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname); + desc_pre = "transient attributes"; break; case controld_section_all: - xpath = crm_strdup_printf(XPATH_NODE_ALL, uname); - desc = crm_strdup_printf("all state for node %s", uname); + *xpath = crm_strdup_printf(XPATH_NODE_ALL, uname); + desc_pre = "all state"; break; case controld_section_all_unlocked: - xpath = crm_strdup_printf(XPATH_NODE_ALL_UNLOCKED, - uname, uname, expire, uname); - desc = crm_strdup_printf("all state (other than shutdown locks) " - "for node %s", uname); + *xpath = crm_strdup_printf(XPATH_NODE_ALL_UNLOCKED, + uname, uname, expire, uname); + desc_pre = "all state (other than shutdown locks)"; + break; + default: + // We called this function incorrectly + CRM_ASSERT(false); break; } - if (cib_conn == NULL) { - crm_warn("Unable to delete %s: no CIB connection", desc); - free(desc); - } else { - int call_id; - - cib__set_call_options(options, "node state deletion", - cib_xpath|cib_multiple); - call_id = cib_conn->cmds->remove(cib_conn, xpath, NULL, options); - crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s", - desc, call_id, xpath); - fsa_register_cib_callback(call_id, desc, cib_delete_callback); - // CIB library handles freeing desc + if (desc != NULL) { + *desc = crm_strdup_printf("%s for node %s", desc_pre, uname); } +} + +/*! + * \internal + * \brief Delete subsection of a node's CIB node_state + * + * \param[in] uname Desired node + * \param[in] section Subsection of node_state to delete + * \param[in] options CIB call options to use + */ +void +controld_delete_node_state(const char *uname, enum controld_section_e section, + int options) +{ + cib_t *cib = controld_globals.cib_conn; + char *xpath = NULL; + char *desc = NULL; + int cib_rc = pcmk_ok; + + CRM_ASSERT((uname != NULL) && (cib != NULL)); + + controld_node_state_deletion_strings(uname, section, &xpath, &desc); + + cib__set_call_options(options, "node state deletion", + cib_xpath|cib_multiple); + cib_rc = cib->cmds->remove(cib, xpath, NULL, options); + fsa_register_cib_callback(cib_rc, desc, cib_delete_callback); + crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s", + desc, cib_rc, xpath); + + // CIB library handles freeing desc free(xpath); } @@ -491,11 +416,12 @@ controld_delete_resource_history(const char *rsc_id, const char *node, char *desc = NULL; char *xpath = NULL; int rc = pcmk_rc_ok; + cib_t *cib = controld_globals.cib_conn; CRM_CHECK((rsc_id != NULL) && (node != NULL), return EINVAL); desc = crm_strdup_printf("resource history for %s on %s", rsc_id, node); - if (controld_globals.cib_conn == NULL) { + if (cib == NULL) { crm_err("Unable to clear %s: no CIB connection", desc); free(desc); return ENOTCONN; @@ -503,9 +429,10 @@ controld_delete_resource_history(const char *rsc_id, const char *node, // Ask CIB to delete the entry xpath = crm_strdup_printf(XPATH_RESOURCE_HISTORY, node, rsc_id); - rc = cib_internal_op(controld_globals.cib_conn, PCMK__CIB_REQUEST_DELETE, - NULL, xpath, NULL, NULL, call_options|cib_xpath, - user_name); + + cib->cmds->set_user(cib, user_name); + rc = cib->cmds->remove(cib, xpath, NULL, call_options|cib_xpath); + cib->cmds->set_user(cib, NULL); if (rc < 0) { rc = pcmk_legacy2rc(rc); @@ -841,10 +768,17 @@ cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *use case pcmk_ok: case -pcmk_err_diff_failed: case -pcmk_err_diff_resync: - crm_trace("Resource update %d complete: rc=%d", call_id, rc); + crm_trace("Resource history update completed (call=%d rc=%d)", + call_id, rc); break; default: - crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc)); + if (call_id > 0) { + crm_warn("Resource history update %d failed: %s " + CRM_XS " rc=%d", call_id, pcmk_strerror(rc), rc); + } else { + crm_warn("Resource history update failed: %s " CRM_XS " rc=%d", + pcmk_strerror(rc), rc); + } } if (call_id == pending_rsc_update) { @@ -863,10 +797,11 @@ should_preserve_lock(lrmd_event_data_t *op) if (!pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { return false; } - if (!strcmp(op->op_type, RSC_STOP) && (op->rc == PCMK_OCF_OK)) { + if (!strcmp(op->op_type, PCMK_ACTION_STOP) && (op->rc == PCMK_OCF_OK)) { return true; } - if (!strcmp(op->op_type, RSC_STATUS) && (op->rc == PCMK_OCF_NOT_RUNNING)) { + if (!strcmp(op->op_type, PCMK_ACTION_MONITOR) + && (op->rc == PCMK_OCF_NOT_RUNNING)) { return true; } return false; @@ -876,10 +811,10 @@ should_preserve_lock(lrmd_event_data_t *op) * \internal * \brief Request a CIB update * - * \param[in] section Section of CIB to update - * \param[in,out] data New XML of CIB section to update - * \param[in] options CIB call options - * \param[in] callback If not NULL, set this as the operation callback + * \param[in] section Section of CIB to update + * \param[in] data New XML of CIB section to update + * \param[in] options CIB call options + * \param[in] callback If not \c NULL, set this as the operation callback * * \return Standard Pacemaker return code * @@ -890,14 +825,13 @@ int controld_update_cib(const char *section, xmlNode *data, int options, void (*callback)(xmlNode *, int, int, xmlNode *, void *)) { + cib_t *cib = controld_globals.cib_conn; int cib_rc = -ENOTCONN; CRM_ASSERT(data != NULL); - if (controld_globals.cib_conn != NULL) { - cib_rc = cib_internal_op(controld_globals.cib_conn, - PCMK__CIB_REQUEST_MODIFY, NULL, section, - data, NULL, options, NULL); + if (cib != NULL) { + cib_rc = cib->cmds->modify(cib, section, data, options); if (cib_rc >= 0) { crm_debug("Submitted CIB update %d for %s section", cib_rc, section); @@ -1047,7 +981,6 @@ controld_delete_action_history(const lrmd_event_data_t *op) controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_none); - crm_log_xml_trace(xml_top, "op:cancel"); free_xml(xml_top); } @@ -1087,7 +1020,6 @@ controld_cib_delete_last_failure(const char *rsc_id, const char *node, { char *xpath = NULL; char *last_failure_key = NULL; - CRM_CHECK((rsc_id != NULL) && (node != NULL), return); // Generate XPath to match desired entry diff --git a/daemons/controld/controld_cib.h b/daemons/controld/controld_cib.h index bd9492a..dcc5a48 100644 --- a/daemons/controld/controld_cib.h +++ b/daemons/controld/controld_cib.h @@ -43,11 +43,6 @@ fsa_cib_anon_update_discard_reply(const char *section, xmlNode *data) { } } -void controld_record_cib_replace_call(int call_id); -bool controld_forget_cib_replace_call(int call_id); -void controld_forget_all_cib_replace_calls(void); -void controld_destroy_cib_replacements_table(void); - int controld_update_cib(const char *section, xmlNode *data, int options, void (*callback)(xmlNode *, int, int, xmlNode *, void *)); @@ -62,6 +57,9 @@ enum controld_section_e { controld_section_all_unlocked }; +void controld_node_state_deletion_strings(const char *uname, + enum controld_section_e section, + char **xpath, char **desc); void controld_delete_node_state(const char *uname, enum controld_section_e section, int options); int controld_delete_resource_history(const char *rsc_id, const char *node, @@ -118,8 +116,8 @@ int crmd_cib_smart_opt(void); static inline bool controld_action_is_recordable(const char *action) { - return !pcmk__str_any_of(action, CRMD_ACTION_CANCEL, CRMD_ACTION_DELETE, - CRMD_ACTION_NOTIFY, CRMD_ACTION_METADATA, NULL); + return !pcmk__str_any_of(action, PCMK_ACTION_CANCEL, PCMK_ACTION_DELETE, + PCMK_ACTION_NOTIFY, PCMK_ACTION_META_DATA, NULL); } #endif // PCMK__CONTROLD_CIB__H diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c index ffc62a0..644d686 100644 --- a/daemons/controld/controld_control.c +++ b/daemons/controld/controld_control.c @@ -221,6 +221,7 @@ crmd_exit(crm_exit_t exit_code) g_list_free(controld_globals.fsa_message_queue); controld_globals.fsa_message_queue = NULL; + controld_free_node_pending_timers(); controld_election_fini(); /* Tear down the CIB manager connection, but don't free it yet -- it could @@ -265,7 +266,6 @@ crmd_exit(crm_exit_t exit_code) controld_globals.te_uuid = NULL; free_max_generation(); - controld_destroy_cib_replacements_table(); controld_destroy_failed_sync_table(); controld_destroy_outside_events_table(); @@ -323,20 +323,12 @@ do_exit(long long action, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_exit_t exit_code = CRM_EX_OK; - int log_level = LOG_INFO; - const char *exit_type = "gracefully"; - if (action & A_EXIT_1) { - log_level = LOG_ERR; - exit_type = "forcefully"; + if (pcmk_is_set(action, A_EXIT_1)) { exit_code = CRM_EX_ERROR; + crm_err("Exiting now due to errors"); } - verify_stopped(cur_state, LOG_ERR); - do_crm_log(log_level, "Performing %s - %s exiting the controller", - fsa_action2string(action), exit_type); - - crm_info("[%s] stopped (%d)", crm_system_name, exit_code); crmd_exit(exit_code); } @@ -504,7 +496,8 @@ do_started(long long action, } else { crm_notice("Pacemaker controller successfully started and accepting connections"); } - controld_trigger_fencer_connect(); + controld_set_fsa_input_flags(R_ST_REQUIRED); + controld_timer_fencer_connect(GINT_TO_POINTER(TRUE)); controld_clear_fsa_input_flags(R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); @@ -684,6 +677,17 @@ static pcmk__cluster_option_t controller_options[] = { "passed since the shutdown was initiated, even if the node has not " "rejoined.") }, + { + XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT, NULL, "time", NULL, + "0", pcmk__valid_interval_spec, + N_("How long to wait for a node that has joined the cluster to join " + "the controller process group"), + N_("Fence nodes that do not join the controller process group within " + "this much time after joining the cluster, to allow the cluster " + "to continue managing resources. A value of 0 means never fence " + "pending nodes. Setting the value to 2h means fence nodes after " + "2 hours.") + }, }; void @@ -722,9 +726,8 @@ config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void } crmconfig = output; - if ((crmconfig) && - (crm_element_name(crmconfig)) && - (strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) { + if ((crmconfig != NULL) + && !pcmk__xe_is(crmconfig, XML_CIB_TAG_CRMCONFIG)) { crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG); } if (!crmconfig) { @@ -761,6 +764,10 @@ config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void controld_globals.shutdown_lock_limit = crm_parse_interval_spec(value) / 1000; + value = g_hash_table_lookup(config_hash, + XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT); + controld_globals.node_pending_timeout = crm_parse_interval_spec(value) / 1000; + value = g_hash_table_lookup(config_hash, "cluster-name"); pcmk__str_update(&(controld_globals.cluster_name), value); diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c index 4378b30..b69e821 100644 --- a/daemons/controld/controld_corosync.c +++ b/daemons/controld/controld_corosync.c @@ -1,5 +1,5 @@ /* - * Copyright 2004-2022 the Pacemaker project contributors + * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -81,9 +81,6 @@ crmd_cs_destroy(gpointer user_data) if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) { crm_crit("Lost connection to cluster layer, shutting down"); crmd_exit(CRM_EX_DISCONNECT); - - } else { - crm_info("Corosync connection closed"); } } @@ -122,7 +119,8 @@ cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name, if (controld_globals.dc_name != NULL) { crm_node_t *peer = NULL; - peer = pcmk__search_cluster_node_cache(0, controld_globals.dc_name); + peer = pcmk__search_cluster_node_cache(0, controld_globals.dc_name, + NULL); if (peer != NULL) { for (int i = 0; i < left_list_entries; ++i) { if (left_list[i].nodeid == peer->id) { diff --git a/daemons/controld/controld_election.c b/daemons/controld/controld_election.c index 5f33d5b..70ffecc 100644 --- a/daemons/controld/controld_election.c +++ b/daemons/controld/controld_election.c @@ -263,13 +263,6 @@ do_dc_release(long long action, } else if (action & A_DC_RELEASED) { crm_info("DC role released"); -#if 0 - if (are there errors) { - /* we can't stay up if not healthy */ - /* or perhaps I_ERROR and go to S_RECOVER? */ - result = I_SHUTDOWN; - } -#endif if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { xmlNode *update = NULL; crm_node_t *node = crm_get_peer(0, controld_globals.our_nodename); diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c index 0de399c..480d37d 100644 --- a/daemons/controld/controld_execd.c +++ b/daemons/controld/controld_execd.c @@ -52,14 +52,10 @@ static void lrm_connection_destroy(void) { if (pcmk_is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) { - crm_crit("Connection to executor failed"); + crm_crit("Lost connection to local executor"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); controld_clear_fsa_input_flags(R_LRM_CONNECTED); - - } else { - crm_info("Disconnected from executor"); } - } static char * @@ -171,7 +167,7 @@ update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_ return; } - if (pcmk__str_eq(op->op_type, RSC_NOTIFY, pcmk__str_casei)) { + if (pcmk__str_eq(op->op_type, PCMK_ACTION_NOTIFY, pcmk__str_casei)) { return; } @@ -222,10 +218,10 @@ update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_ } entry->last = lrmd_copy_event(op); - if (op->params && pcmk__strcase_any_of(op->op_type, CRMD_ACTION_START, - CRMD_ACTION_RELOAD, - CRMD_ACTION_RELOAD_AGENT, - CRMD_ACTION_STATUS, NULL)) { + if (op->params && pcmk__strcase_any_of(op->op_type, PCMK_ACTION_START, + PCMK_ACTION_RELOAD, + PCMK_ACTION_RELOAD_AGENT, + PCMK_ACTION_MONITOR, NULL)) { if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } @@ -243,7 +239,9 @@ update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_ op->rsc_id, op->op_type, op->interval_ms); entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); - } else if (entry->recurring_op_list && !pcmk__str_eq(op->op_type, RSC_STATUS, pcmk__str_casei)) { + } else if ((entry->recurring_op_list != NULL) + && !pcmk__str_eq(op->op_type, PCMK_ACTION_MONITOR, + pcmk__str_casei)) { crm_trace("Dropping %d recurring ops because of: " PCMK__OP_FMT, g_list_length(entry->recurring_op_list), op->rsc_id, op->op_type, op->interval_ms); @@ -376,10 +374,8 @@ do_lrm_control(long long action, } controld_clear_fsa_input_flags(R_LRM_CONNECTED); - crm_info("Disconnecting from the executor"); lrm_state_disconnect(lrm_state); lrm_state_reset_tables(lrm_state, FALSE); - crm_notice("Disconnected from the executor"); } if (action & A_LRM_CONNECT) { @@ -510,11 +506,14 @@ is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id) crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type, entry->last->interval_ms, entry->last->rc); - if (entry->last->rc == PCMK_OCF_OK && pcmk__str_eq(entry->last->op_type, CRMD_ACTION_STOP, pcmk__str_casei)) { + if ((entry->last->rc == PCMK_OCF_OK) + && pcmk__str_eq(entry->last->op_type, PCMK_ACTION_STOP, + pcmk__str_casei)) { return FALSE; } else if (entry->last->rc == PCMK_OCF_OK - && pcmk__str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE, pcmk__str_casei)) { + && pcmk__str_eq(entry->last->op_type, PCMK_ACTION_MIGRATE_TO, + pcmk__str_casei)) { // A stricter check is too complex ... leave that to the scheduler return FALSE; @@ -668,7 +667,7 @@ notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_ crm_info("Notifying %s on %s that %s was%s deleted", from_sys, (from_host? from_host : "localhost"), rsc_id, ((rc == pcmk_ok)? "" : " not")); - op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE); + op = construct_op(lrm_state, input->xml, rsc_id, PCMK_ACTION_DELETE); controld_rc2event(op, pcmk_legacy2rc(rc)); controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); @@ -1117,7 +1116,8 @@ synthesize_lrmd_failure(lrm_state_t *lrm_state, const xmlNode *action, op = construct_op(lrm_state, action, ID(xml_rsc), operation); - if (pcmk__str_eq(operation, RSC_NOTIFY, pcmk__str_casei)) { // Notifications can't fail + if (pcmk__str_eq(operation, PCMK_ACTION_NOTIFY, pcmk__str_casei)) { + // Notifications can't fail fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_OK, NULL); } else { fake_op_status(lrm_state, op, op_status, rc, exit_reason); @@ -1329,7 +1329,7 @@ do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, if (cib_rc != pcmk_rc_ok) { lrmd_event_data_t *op = NULL; - op = construct_op(lrm_state, input->xml, rsc->id, CRMD_ACTION_DELETE); + op = construct_op(lrm_state, input->xml, rsc->id, PCMK_ACTION_DELETE); /* These are resource clean-ups, not actions, so no exit reason is * needed. @@ -1394,7 +1394,9 @@ metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data) md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc, result->action_stdout); } - do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md); + if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) { + do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md); + } free_metadata_cb_data(data); } @@ -1438,11 +1440,11 @@ do_lrm_invoke(long long action, from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } - if (pcmk__str_eq(crm_op, CRM_OP_LRM_DELETE, pcmk__str_none)) { + if (pcmk__str_eq(crm_op, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) { if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) { crm_rsc_delete = TRUE; // from crm_resource } - operation = CRMD_ACTION_DELETE; + operation = PCMK_ACTION_DELETE; } else if (input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); @@ -1486,7 +1488,7 @@ do_lrm_invoke(long long action, } else if (operation != NULL) { lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); - gboolean create_rsc = !pcmk__str_eq(operation, CRMD_ACTION_DELETE, + gboolean create_rsc = !pcmk__str_eq(operation, PCMK_ACTION_DELETE, pcmk__str_none); int rc; @@ -1534,12 +1536,13 @@ do_lrm_invoke(long long action, return; } - if (pcmk__str_eq(operation, CRMD_ACTION_CANCEL, pcmk__str_none)) { + if (pcmk__str_eq(operation, PCMK_ACTION_CANCEL, pcmk__str_none)) { if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) { crm_log_xml_warn(input->xml, "Bad command"); } - } else if (pcmk__str_eq(operation, CRMD_ACTION_DELETE, pcmk__str_none)) { + } else if (pcmk__str_eq(operation, PCMK_ACTION_DELETE, + pcmk__str_none)) { do_lrm_delete(input, lrm_state, rsc, from_sys, from_host, crm_rsc_delete, user_name); @@ -1554,7 +1557,7 @@ do_lrm_invoke(long long action, * changed (using something like inotify, or a hash or modification * time of the agent executable). */ - if (strcmp(operation, CRMD_ACTION_START) != 0) { + if (strcmp(operation, PCMK_ACTION_START) != 0) { md = controld_get_rsc_metadata(lrm_state, rsc, controld_metadata_from_cache); } @@ -1619,7 +1622,8 @@ construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op, lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); if (rsc_op == NULL) { - CRM_LOG_ASSERT(pcmk__str_eq(CRMD_ACTION_STOP, operation, pcmk__str_casei)); + CRM_LOG_ASSERT(pcmk__str_eq(operation, PCMK_ACTION_STOP, + pcmk__str_casei)); op->user_data = NULL; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting @@ -1654,7 +1658,7 @@ construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op, class = crm_element_value(primitive, XML_AGENT_ATTR_CLASS); if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_fence_params) - && pcmk__str_eq(operation, CRMD_ACTION_STATUS, pcmk__str_casei) + && pcmk__str_eq(operation, PCMK_ACTION_MONITOR, pcmk__str_casei) && (op->interval_ms > 0)) { op_timeout = g_hash_table_lookup(params, "pcmk_monitor_timeout"); @@ -1663,7 +1667,7 @@ construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op, } } - if (!pcmk__str_eq(operation, RSC_STOP, pcmk__str_casei)) { + if (!pcmk__str_eq(operation, PCMK_ACTION_STOP, pcmk__str_casei)) { op->params = params; } else { @@ -1703,7 +1707,8 @@ construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op, op->user_data = strdup(transition); if (op->interval_ms != 0) { - if (pcmk__strcase_any_of(operation, CRMD_ACTION_START, CRMD_ACTION_STOP, NULL)) { + if (pcmk__strcase_any_of(operation, PCMK_ACTION_START, PCMK_ACTION_STOP, + NULL)) { crm_err("Start and Stop actions cannot have an interval: %u", op->interval_ms); op->interval_ms = 0; @@ -1849,7 +1854,7 @@ static bool should_cancel_recurring(const char *rsc_id, const char *action, guint interval_ms) { if (is_remote_lrmd_ra(NULL, NULL, rsc_id) && (interval_ms == 0) - && (strcmp(action, CRMD_ACTION_MIGRATE) == 0)) { + && (strcmp(action, PCMK_ACTION_MIGRATE_TO) == 0)) { /* Don't stop monitoring a migrating Pacemaker Remote connection * resource until the entire migration has completed. We must detect if * the connection is unexpectedly severed, even during a migration. @@ -1859,8 +1864,8 @@ should_cancel_recurring(const char *rsc_id, const char *action, guint interval_m // Cancel recurring actions before changing resource state return (interval_ms == 0) - && !pcmk__str_any_of(action, CRMD_ACTION_STATUS, CRMD_ACTION_NOTIFY, - NULL); + && !pcmk__str_any_of(action, PCMK_ACTION_MONITOR, + PCMK_ACTION_NOTIFY, NULL); } /*! @@ -1876,7 +1881,7 @@ static const char * should_nack_action(const char *action) { if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN) - && pcmk__str_eq(action, RSC_START, pcmk__str_none)) { + && pcmk__str_eq(action, PCMK_ACTION_START, pcmk__str_none)) { register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); return "Not attempting start due to shutdown in progress"; @@ -1888,7 +1893,7 @@ should_nack_action(const char *action) case S_TRANSITION_ENGINE: break; default: - if (!pcmk__str_eq(action, CRMD_ACTION_STOP, pcmk__str_none)) { + if (!pcmk__str_eq(action, PCMK_ACTION_STOP, pcmk__str_none)) { return "Controller cannot attempt actions at this time"; } break; @@ -1930,8 +1935,8 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg, return; } - if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD, - CRMD_ACTION_RELOAD_AGENT, NULL)) { + if (pcmk__str_any_of(operation, PCMK_ACTION_RELOAD, + PCMK_ACTION_RELOAD_AGENT, NULL)) { /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs * will schedule reload-agent actions only. In either case, we need * to map that to whatever the resource agent actually supports. @@ -1939,9 +1944,9 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg, */ if ((md != NULL) && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) { - operation = CRMD_ACTION_RELOAD; + operation = PCMK_ACTION_RELOAD; } else { - operation = CRMD_ACTION_RELOAD_AGENT; + operation = PCMK_ACTION_RELOAD_AGENT; } } @@ -1968,8 +1973,9 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg, /* now do the op */ crm_notice("Requesting local execution of %s operation for %s on %s " CRM_XS " transition_key=%s op_key=" PCMK__OP_FMT, - crm_action_str(op->op_type, op->interval_ms), rsc->id, lrm_state->node_name, - pcmk__s(transition, ""), rsc->id, operation, op->interval_ms); + pcmk__readable_action(op->op_type, op->interval_ms), rsc->id, + lrm_state->node_name, pcmk__s(transition, ""), rsc->id, + operation, op->interval_ms); nack_reason = should_nack_action(operation); if (nack_reason != NULL) { @@ -2131,7 +2137,8 @@ log_executor_event(const lrmd_event_data_t *op, const char *op_key, GString *str = g_string_sized_new(100); // reasonable starting size pcmk__g_strcat(str, - "Result of ", crm_action_str(op->op_type, op->interval_ms), + "Result of ", + pcmk__readable_action(op->op_type, op->interval_ms), " operation for ", op->rsc_id, NULL); if (node_name != NULL) { @@ -2401,7 +2408,8 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, log_executor_event(op, op_key, node_name, removed); if (lrm_state) { - if (!pcmk__str_eq(op->op_type, RSC_METADATA, pcmk__str_casei)) { + if (!pcmk__str_eq(op->op_type, PCMK_ACTION_META_DATA, + pcmk__str_casei)) { crmd_alert_resource_op(lrm_state->node_name, op); } else if (rsc && (op->rc == PCMK_OCF_OK)) { char *metadata = unescape_newlines(op->output); diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c index 8c68bfc..b90cc5e 100644 --- a/daemons/controld/controld_execd_state.c +++ b/daemons/controld/controld_execd_state.c @@ -132,12 +132,6 @@ lrm_state_create(const char *node_name) return state; } -void -lrm_state_destroy(const char *node_name) -{ - g_hash_table_remove(lrm_state_table, node_name); -} - static gboolean remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data) { @@ -307,7 +301,7 @@ lrm_state_destroy_all(void) lrm_state_t * lrm_state_find(const char *node_name) { - if (!node_name) { + if ((node_name == NULL) || (lrm_state_table == NULL)) { return NULL; } return g_hash_table_lookup(lrm_state_table, node_name); @@ -318,6 +312,8 @@ lrm_state_find_or_create(const char *node_name) { lrm_state_t *lrm_state; + CRM_CHECK(lrm_state_table != NULL, return NULL); + lrm_state = g_hash_table_lookup(lrm_state_table, node_name); if (!lrm_state) { lrm_state = lrm_state_create(node_name); @@ -329,6 +325,9 @@ lrm_state_find_or_create(const char *node_name) GList * lrm_state_get_list(void) { + if (lrm_state_table == NULL) { + return NULL; + } return g_hash_table_get_values(lrm_state_table); } @@ -799,7 +798,7 @@ lrm_state_unregister_rsc(lrm_state_t * lrm_state, } if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { - lrm_state_destroy(rsc_id); + g_hash_table_remove(lrm_state_table, rsc_id); return pcmk_ok; } diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c index 89cb61f..9557d9e 100644 --- a/daemons/controld/controld_fencing.c +++ b/daemons/controld/controld_fencing.c @@ -218,8 +218,11 @@ send_stonith_update(pcmk__graph_action_t *action, const char *target, CRM_CHECK(target != NULL, return); CRM_CHECK(uuid != NULL, return); - /* Make sure the membership and join caches are accurate */ - peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY); + /* Make sure the membership and join caches are accurate. + * Try getting any existing node cache entry also by node uuid in case it + * doesn't have an uname yet. + */ + peer = pcmk__get_peer_full(0, target, uuid, CRM_GET_PEER_ANY); CRM_CHECK(peer != NULL, return); @@ -391,7 +394,7 @@ execute_stonith_cleanup(void) */ static stonith_t *stonith_api = NULL; -static crm_trigger_t *stonith_reconnect = NULL; +static mainloop_timer_t *controld_fencer_connect_timer = NULL; static char *te_client_id = NULL; static gboolean @@ -422,7 +425,7 @@ fail_incompletable_stonith(pcmk__graph_t *graph) } task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); - if (task && pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) { + if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) { pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); last_action = action->xml; pcmk__update_graph(graph, action); @@ -447,11 +450,12 @@ tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e) te_cleanup_stonith_history_sync(st, FALSE); if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) { - crm_crit("Fencing daemon connection failed"); - mainloop_set_trigger(stonith_reconnect); - + crm_err("Lost fencer connection (will attempt to reconnect)"); + if (!mainloop_timer_running(controld_fencer_connect_timer)) { + mainloop_timer_start(controld_fencer_connect_timer); + } } else { - crm_info("Fencing daemon disconnected"); + crm_info("Disconnected from fencer"); } if (stonith_api) { @@ -515,7 +519,7 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event) crmd_alert_fencing_op(event); - if (pcmk__str_eq("on", event->action, pcmk__str_none)) { + if (pcmk__str_eq(PCMK_ACTION_ON, event->action, pcmk__str_none)) { // Unfencing doesn't need special handling, just a log message if (succeeded) { crm_notice("%s was unfenced by %s at the request of %s@%s", @@ -647,14 +651,14 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event) /*! * \brief Connect to fencer * - * \param[in] user_data If NULL, retry failures now, otherwise retry in main loop + * \param[in] user_data If NULL, retry failures now, otherwise retry in mainloop timer * - * \return TRUE + * \return G_SOURCE_REMOVE on success, G_SOURCE_CONTINUE to retry * \note If user_data is NULL, this will wait 2s between attempts, for up to * 30 attempts, meaning the controller could be blocked as long as 58s. */ -static gboolean -te_connect_stonith(gpointer user_data) +gboolean +controld_timer_fencer_connect(gpointer user_data) { int rc = pcmk_ok; @@ -662,13 +666,13 @@ te_connect_stonith(gpointer user_data) stonith_api = stonith_api_new(); if (stonith_api == NULL) { crm_err("Could not connect to fencer: API memory allocation failed"); - return TRUE; + return G_SOURCE_REMOVE; } } if (stonith_api->state != stonith_disconnected) { crm_trace("Already connected to fencer, no need to retry"); - return TRUE; + return G_SOURCE_REMOVE; } if (user_data == NULL) { @@ -681,17 +685,30 @@ te_connect_stonith(gpointer user_data) } else { // Non-blocking (retry failures later in main loop) rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL); + + if (controld_fencer_connect_timer == NULL) { + controld_fencer_connect_timer = + mainloop_timer_add("controld_fencer_connect", 1000, + TRUE, controld_timer_fencer_connect, + GINT_TO_POINTER(TRUE)); + } + if (rc != pcmk_ok) { if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) { crm_notice("Fencer connection failed (will retry): %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); - mainloop_set_trigger(stonith_reconnect); + + if (!mainloop_timer_running(controld_fencer_connect_timer)) { + mainloop_timer_start(controld_fencer_connect_timer); + } + + return G_SOURCE_CONTINUE; } else { crm_info("Fencer connection failed (ignoring because no longer required): %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); } - return TRUE; + return G_SOURCE_REMOVE; } } @@ -709,23 +726,7 @@ te_connect_stonith(gpointer user_data) crm_notice("Fencer successfully connected"); } - return TRUE; -} - -/*! - \internal - \brief Schedule fencer connection attempt in main loop -*/ -void -controld_trigger_fencer_connect(void) -{ - if (stonith_reconnect == NULL) { - stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, - te_connect_stonith, - GINT_TO_POINTER(TRUE)); - } - controld_set_fsa_input_flags(R_ST_REQUIRED); - mainloop_set_trigger(stonith_reconnect); + return G_SOURCE_REMOVE; } void @@ -745,9 +746,9 @@ controld_disconnect_fencer(bool destroy) stonith_api->cmds->free(stonith_api); stonith_api = NULL; } - if (stonith_reconnect) { - mainloop_destroy_trigger(stonith_reconnect); - stonith_reconnect = NULL; + if (controld_fencer_connect_timer) { + mainloop_timer_del(controld_fencer_connect_timer); + controld_fencer_connect_timer = NULL; } if (te_client_id) { free(te_client_id); @@ -843,7 +844,7 @@ tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data) crm_info("Fence operation %d for %s succeeded", data->call_id, target); if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) { te_action_confirmed(action, NULL); - if (pcmk__str_eq("on", op, pcmk__str_casei)) { + if (pcmk__str_eq(PCMK_ACTION_ON, op, pcmk__str_casei)) { const char *value = NULL; char *now = pcmk__ttoa(time(NULL)); gboolean is_remote_node = FALSE; @@ -981,7 +982,7 @@ controld_execute_fence_action(pcmk__graph_t *graph, priority_delay ? priority_delay : ""); /* Passing NULL means block until we can connect... */ - te_connect_stonith(NULL); + controld_timer_fencer_connect(NULL); pcmk__scan_min_int(priority_delay, &delay_i, 0); rc = fence_with_delay(target, type, delay_i); @@ -1000,12 +1001,14 @@ controld_execute_fence_action(pcmk__graph_t *graph, bool controld_verify_stonith_watchdog_timeout(const char *value) { + long st_timeout = value? crm_get_msec(value) : 0; const char *our_nodename = controld_globals.our_nodename; gboolean rv = TRUE; - if (stonith_api && (stonith_api->state != stonith_disconnected) && - stonith__watchdog_fencing_enabled_for_node_api(stonith_api, - our_nodename)) { + if (st_timeout == 0 + || (stonith_api && (stonith_api->state != stonith_disconnected) && + stonith__watchdog_fencing_enabled_for_node_api(stonith_api, + our_nodename))) { rv = pcmk__valid_sbd_timeout(value); } return rv; diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h index 86a5050..76779c6 100644 --- a/daemons/controld/controld_fencing.h +++ b/daemons/controld/controld_fencing.h @@ -19,7 +19,7 @@ void controld_configure_fencing(GHashTable *options); void st_fail_count_reset(const char * target); // stonith API client -void controld_trigger_fencer_connect(void); +gboolean controld_timer_fencer_connect(gpointer user_data); void controld_disconnect_fencer(bool destroy); int controld_execute_fence_action(pcmk__graph_t *graph, pcmk__graph_action_t *action); diff --git a/daemons/controld/controld_fsa.c b/daemons/controld/controld_fsa.c index 622d1c8..06559b8 100644 --- a/daemons/controld/controld_fsa.c +++ b/daemons/controld/controld_fsa.c @@ -205,7 +205,6 @@ s_crmd_fsa(enum crmd_fsa_cause cause) fsa_data->data_type = fsa_dt_none; controld_globals.fsa_message_queue = g_list_append(controld_globals.fsa_message_queue, fsa_data); - fsa_data = NULL; } while ((controld_globals.fsa_message_queue != NULL) && !pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) { @@ -275,7 +274,6 @@ s_crmd_fsa(enum crmd_fsa_cause cause) /* start doing things... */ s_crmd_fsa_actions(fsa_data); delete_fsa_input(fsa_data); - fsa_data = NULL; } if ((controld_globals.fsa_message_queue != NULL) @@ -620,11 +618,6 @@ do_state_transition(enum crmd_fsa_state cur_state, if (next_state != S_ELECTION && cur_state != S_RELEASE_DC) { controld_stop_current_election_timeout(); } -#if 0 - if ((controld_globals.fsa_input_register & R_SHUTDOWN)) { - controld_set_fsa_action_flags(A_DC_TIMER_STOP); - } -#endif if (next_state == S_INTEGRATION) { controld_set_fsa_action_flags(A_INTEGRATE_TIMER_START); } else { diff --git a/daemons/controld/controld_globals.h b/daemons/controld/controld_globals.h index eff1607..2ff8a57 100644 --- a/daemons/controld/controld_globals.h +++ b/daemons/controld/controld_globals.h @@ -45,9 +45,6 @@ typedef struct { //! Connection to the CIB cib_t *cib_conn; - //! CIB connection's client ID - const char *cib_client_id; - // Scheduler @@ -93,6 +90,9 @@ typedef struct { //! Max lifetime (in seconds) of a resource's shutdown lock to a node guint shutdown_lock_limit; + //! Node pending timeout + guint node_pending_timeout; + //! Main event loop GMainLoop *mainloop; } controld_globals_t; diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c index da6a9d6..805ecbd 100644 --- a/daemons/controld/controld_join_client.c +++ b/daemons/controld/controld_join_client.c @@ -112,15 +112,6 @@ do_cl_join_offer_respond(long long action, CRM_CHECK(input != NULL, return); -#if 0 - if (we are sick) { - log error; - - /* save the request for later? */ - return; - } -#endif - welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM); join_id = crm_element_value(input->msg, F_CRM_JOIN_ID); crm_trace("Accepting cluster join offer from node %s "CRM_XS" join-%s", @@ -195,32 +186,34 @@ join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void * free_xml(generation); } -static void -set_join_state(const char * start_state) +void +set_join_state(const char *start_state, const char *node_name, const char *node_uuid, + bool remote) { if (pcmk__str_eq(start_state, "standby", pcmk__str_casei)) { crm_notice("Forcing node %s to join in %s state per configured " - "environment", controld_globals.our_nodename, start_state); + "environment", node_name, start_state); cib__update_node_attr(controld_globals.logger_out, controld_globals.cib_conn, cib_sync_call, - XML_CIB_TAG_NODES, controld_globals.our_uuid, - NULL, NULL, NULL, "standby", "on", NULL, NULL); + XML_CIB_TAG_NODES, node_uuid, + NULL, NULL, NULL, "standby", "on", NULL, + remote ? "remote" : NULL); } else if (pcmk__str_eq(start_state, "online", pcmk__str_casei)) { crm_notice("Forcing node %s to join in %s state per configured " - "environment", controld_globals.our_nodename, start_state); + "environment", node_name, start_state); cib__update_node_attr(controld_globals.logger_out, controld_globals.cib_conn, cib_sync_call, - XML_CIB_TAG_NODES, controld_globals.our_uuid, - NULL, NULL, NULL, "standby", "off", NULL, NULL); + XML_CIB_TAG_NODES, node_uuid, + NULL, NULL, NULL, "standby", "off", NULL, + remote ? "remote" : NULL); } else if (pcmk__str_eq(start_state, "default", pcmk__str_casei)) { - crm_debug("Not forcing a starting state on node %s", - controld_globals.our_nodename); + crm_debug("Not forcing a starting state on node %s", node_name); } else { crm_warn("Unrecognized start state '%s', using 'default' (%s)", - start_state, controld_globals.our_nodename); + start_state, node_name); } } @@ -335,7 +328,8 @@ do_cl_join_finalize_respond(long long action, first_join = FALSE; if (start_state) { - set_join_state(start_state); + set_join_state(start_state, controld_globals.our_nodename, + controld_globals.our_uuid, false); } } diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c index f82b132..2fe6710 100644 --- a/daemons/controld/controld_join_dc.c +++ b/daemons/controld/controld_join_dc.c @@ -172,7 +172,6 @@ start_join_round(void) max_generation_xml = NULL; } controld_clear_fsa_input_flags(R_HAVE_CIB); - controld_forget_all_cib_replace_calls(); } /*! @@ -607,10 +606,6 @@ do_dc_join_finalize(long long action, rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn, sync_from, NULL, cib_none); - - if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { - controld_record_cib_replace_call(rc); - } fsa_register_cib_callback(rc, sync_from, finalize_sync_callback); } @@ -629,8 +624,6 @@ finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, voi { CRM_LOG_ASSERT(-EPERM != rc); - controld_forget_cib_replace_call(call_id); - if (rc != pcmk_ok) { const char *sync_from = (const char *) user_data; @@ -674,22 +667,25 @@ finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, voi } static void -join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +join_node_state_commit_callback(xmlNode *msg, int call_id, int rc, + xmlNode *output, void *user_data) { - fsa_data_t *msg_data = NULL; + const char *node = user_data; - if (rc == pcmk_ok) { - crm_debug("join-%d node history update (via CIB call %d) complete", - current_join_id, call_id); - check_join_state(controld_globals.fsa_state, __func__); + if (rc != pcmk_ok) { + fsa_data_t *msg_data = NULL; // for register_fsa_error() macro - } else { - crm_err("join-%d node history update (via CIB call %d) failed: %s " - "(next transition may determine resource status incorrectly)", - current_join_id, call_id, pcmk_strerror(rc)); + crm_crit("join-%d node history update (via CIB call %d) for node %s " + "failed: %s", + current_join_id, call_id, node, pcmk_strerror(rc)); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } + + crm_debug("join-%d node history update (via CIB call %d) for node %s " + "complete", + current_join_id, call_id, node); + check_join_state(controld_globals.fsa_state, __func__); } /* A_DC_JOIN_PROCESS_ACK */ @@ -701,33 +697,39 @@ do_dc_join_ack(long long action, { int join_id = -1; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); - enum controld_section_e section = controld_section_lrm; - const int cib_opts = cib_scope_local|cib_can_create; const char *op = crm_element_value(join_ack->msg, F_CRM_TASK); - const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); + char *join_from = crm_element_value_copy(join_ack->msg, F_CRM_HOST_FROM); crm_node_t *peer = NULL; + enum controld_section_e section = controld_section_lrm; + char *xpath = NULL; + xmlNode *state = join_ack->xml; + xmlNode *execd_state = NULL; + + cib_t *cib = controld_globals.cib_conn; + int rc = pcmk_ok; + // Sanity checks if (join_from == NULL) { crm_warn("Ignoring message received without node identification"); - return; + goto done; } if (op == NULL) { crm_warn("Ignoring message received from %s without task", join_from); - return; + goto done; } if (strcmp(op, CRM_OP_JOIN_CONFIRM)) { crm_debug("Ignoring '%s' message from %s while waiting for '%s'", op, join_from, CRM_OP_JOIN_CONFIRM); - return; + goto done; } if (crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id) != 0) { crm_warn("Ignoring join confirmation from %s without valid join ID", join_from); - return; + goto done; } peer = crm_get_peer(0, join_from); @@ -736,7 +738,7 @@ do_dc_join_ack(long long action, "(currently %s not %s)", join_id, join_from, crm_join_phase_str(peer->join), crm_join_phase_str(crm_join_finalized)); - return; + goto done; } if (join_id != current_join_id) { @@ -744,40 +746,85 @@ do_dc_join_ack(long long action, "because currently on join-%d", join_id, join_from, current_join_id); crm_update_peer_join(__func__, peer, crm_join_nack); - return; + goto done; } crm_update_peer_join(__func__, peer, crm_join_confirmed); /* Update CIB with node's current executor state. A new transition will be - * triggered later, when the CIB notifies us of the change. + * triggered later, when the CIB manager notifies us of the change. + * + * The delete and modify requests are part of an atomic transaction. */ + rc = cib->cmds->init_transaction(cib); + if (rc != pcmk_ok) { + goto done; + } + + // Delete relevant parts of node's current executor state from CIB if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { section = controld_section_lrm_unlocked; } - controld_delete_node_state(join_from, section, cib_scope_local); + controld_node_state_deletion_strings(join_from, section, &xpath, NULL); + + rc = cib->cmds->remove(cib, xpath, NULL, + cib_scope_local + |cib_xpath + |cib_multiple + |cib_transaction); + if (rc != pcmk_ok) { + goto done; + } + + // Update CIB with node's latest known executor state if (pcmk__str_eq(join_from, controld_globals.our_nodename, pcmk__str_casei)) { - xmlNode *now_dc_lrmd_state = controld_query_executor_state(); - - if (now_dc_lrmd_state != NULL) { - crm_debug("Updating local node history for join-%d " - "from query result", join_id); - controld_update_cib(XML_CIB_TAG_STATUS, now_dc_lrmd_state, cib_opts, - join_update_complete_callback); - free_xml(now_dc_lrmd_state); + + // Use the latest possible state if processing our own join ack + execd_state = controld_query_executor_state(); + + if (execd_state != NULL) { + crm_debug("Updating local node history for join-%d from query " + "result", + current_join_id); + state = execd_state; + } else { crm_warn("Updating local node history from join-%d confirmation " - "because query failed", join_id); - controld_update_cib(XML_CIB_TAG_STATUS, join_ack->xml, cib_opts, - join_update_complete_callback); + "because query failed", + current_join_id); } + } else { crm_debug("Updating node history for %s from join-%d confirmation", - join_from, join_id); - controld_update_cib(XML_CIB_TAG_STATUS, join_ack->xml, cib_opts, - join_update_complete_callback); + join_from, current_join_id); + } + + rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, state, + cib_scope_local|cib_can_create|cib_transaction); + free_xml(execd_state); + if (rc != pcmk_ok) { + goto done; + } + + // Commit the transaction + rc = cib->cmds->end_transaction(cib, true, cib_scope_local); + fsa_register_cib_callback(rc, join_from, join_node_state_commit_callback); + + if (rc > 0) { + // join_from will be freed after callback + join_from = NULL; + rc = pcmk_ok; + } + +done: + if (rc != pcmk_ok) { + crm_crit("join-%d node history update for node %s failed: %s", + current_join_id, join_from, pcmk_strerror(rc)); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } + free(join_from); + free(xpath); } void @@ -808,7 +855,7 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data) */ crm_trace("Updating node name and UUID in CIB for %s", join_to); tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE); - set_uuid(tmp1, XML_ATTR_ID, join_node); + crm_xml_add(tmp1, XML_ATTR_ID, crm_peer_uuid(join_node)); crm_xml_add(tmp1, XML_ATTR_UNAME, join_to); fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1); free_xml(tmp1); diff --git a/daemons/controld/controld_lrm.h b/daemons/controld/controld_lrm.h index 25f3db3..c3113e4 100644 --- a/daemons/controld/controld_lrm.h +++ b/daemons/controld/controld_lrm.h @@ -109,11 +109,6 @@ gboolean lrm_state_init_local(void); void lrm_state_destroy_all(void); /*! - * \brief Destroy executor connection by node name - */ -void lrm_state_destroy(const char *node_name); - -/*! * \brief Find lrm_state data by node name */ lrm_state_t *lrm_state_find(const char *node_name); diff --git a/daemons/controld/controld_membership.c b/daemons/controld/controld_membership.c index 1f7e4c0..f25d1e9 100644 --- a/daemons/controld/controld_membership.c +++ b/daemons/controld/controld_membership.c @@ -138,10 +138,8 @@ create_node_state_update(crm_node_t *node, int flags, xmlNode *parent, pcmk__xe_set_bool_attr(node_state, XML_NODE_IS_REMOTE, true); } - set_uuid(node_state, XML_ATTR_ID, node); - - if (crm_element_value(node_state, XML_ATTR_ID) == NULL) { - crm_info("Node update for %s cancelled: no id", node->uname); + if (crm_xml_add(node_state, XML_ATTR_ID, crm_peer_uuid(node)) == NULL) { + crm_info("Node update for %s cancelled: no ID", node->uname); free_xml(node_state); return NULL; } @@ -149,17 +147,31 @@ create_node_state_update(crm_node_t *node, int flags, xmlNode *parent, crm_xml_add(node_state, XML_ATTR_UNAME, node->uname); if ((flags & node_update_cluster) && node->state) { - pcmk__xe_set_bool_attr(node_state, XML_NODE_IN_CLUSTER, - pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)); + if (compare_version(controld_globals.dc_version, "3.18.0") >= 0) { + // A value 0 means the node is not a cluster member. + crm_xml_add_ll(node_state, PCMK__XA_IN_CCM, node->when_member); + + } else { + pcmk__xe_set_bool_attr(node_state, PCMK__XA_IN_CCM, + pcmk__str_eq(node->state, CRM_NODE_MEMBER, + pcmk__str_casei)); + } } if (!pcmk_is_set(node->flags, crm_remote_node)) { if (flags & node_update_peer) { - value = OFFLINESTATUS; - if (pcmk_is_set(node->processes, crm_get_cluster_proc())) { - value = ONLINESTATUS; + if (compare_version(controld_globals.dc_version, "3.18.0") >= 0) { + // A value 0 means the peer is offline in CPG. + crm_xml_add_ll(node_state, PCMK__XA_CRMD, node->when_online); + + } else { + // @COMPAT DCs < 2.1.7 use online/offline rather than timestamp + value = OFFLINESTATUS; + if (pcmk_is_set(node->processes, crm_get_cluster_proc())) { + value = ONLINESTATUS; + } + crm_xml_add(node_state, PCMK__XA_CRMD, value); } - crm_xml_add(node_state, XML_NODE_IS_PEER, value); } if (flags & node_update_join) { @@ -168,11 +180,11 @@ create_node_state_update(crm_node_t *node, int flags, xmlNode *parent, } else { value = CRMD_JOINSTATE_MEMBER; } - crm_xml_add(node_state, XML_NODE_JOIN_STATE, value); + crm_xml_add(node_state, PCMK__XA_JOIN, value); } if (flags & node_update_expected) { - crm_xml_add(node_state, XML_NODE_EXPECTED, node->expected); + crm_xml_add(node_state, PCMK__XA_EXPECTED, node->expected); } } @@ -210,7 +222,7 @@ search_conflicting_node_callback(xmlNode * msg, int call_id, int rc, return; } - if (pcmk__str_eq(crm_element_name(output), XML_CIB_TAG_NODE, pcmk__str_casei)) { + if (pcmk__xe_is(output, XML_CIB_TAG_NODE)) { node_xml = output; } else { @@ -224,7 +236,7 @@ search_conflicting_node_callback(xmlNode * msg, int call_id, int rc, crm_node_t *node = NULL; gboolean known = FALSE; - if (!pcmk__str_eq(crm_element_name(node_xml), XML_CIB_TAG_NODE, pcmk__str_casei)) { + if (!pcmk__xe_is(node_xml, XML_CIB_TAG_NODE)) { continue; } diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c index 54b27ec..39f3c7a 100644 --- a/daemons/controld/controld_messages.c +++ b/daemons/controld/controld_messages.c @@ -328,52 +328,80 @@ route_message(enum crmd_fsa_cause cause, xmlNode * input) gboolean relay_message(xmlNode * msg, gboolean originated_locally) { - int dest = 1; + enum crm_ais_msg_types dest = crm_msg_ais; bool is_for_dc = false; bool is_for_dcib = false; bool is_for_te = false; bool is_for_crm = false; bool is_for_cib = false; bool is_local = false; - const char *host_to = crm_element_value(msg, F_CRM_HOST_TO); - const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); - const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); - const char *type = crm_element_value(msg, F_TYPE); - const char *task = crm_element_value(msg, F_CRM_TASK); - const char *ref = crm_element_value(msg, XML_ATTR_REFERENCE); + bool broadcast = false; + const char *host_to = NULL; + const char *sys_to = NULL; + const char *sys_from = NULL; + const char *type = NULL; + const char *task = NULL; + const char *ref = NULL; + crm_node_t *node_to = NULL; + + CRM_CHECK(msg != NULL, return TRUE); + + host_to = crm_element_value(msg, F_CRM_HOST_TO); + sys_to = crm_element_value(msg, F_CRM_SYS_TO); + sys_from = crm_element_value(msg, F_CRM_SYS_FROM); + type = crm_element_value(msg, F_TYPE); + task = crm_element_value(msg, F_CRM_TASK); + ref = crm_element_value(msg, XML_ATTR_REFERENCE); + + broadcast = pcmk__str_empty(host_to); if (ref == NULL) { ref = "without reference ID"; } - if (msg == NULL) { - crm_warn("Cannot route empty message"); - return TRUE; - - } else if (pcmk__str_eq(task, CRM_OP_HELLO, pcmk__str_casei)) { - crm_trace("No routing needed for hello message %s", ref); + if (pcmk__str_eq(task, CRM_OP_HELLO, pcmk__str_casei)) { + crm_trace("Received hello %s from %s (no processing needed)", + ref, pcmk__s(sys_from, "unidentified source")); + crm_log_xml_trace(msg, "hello"); return TRUE; + } - } else if (!pcmk__str_eq(type, T_CRM, pcmk__str_casei)) { - crm_warn("Received invalid message %s: type '%s' not '" T_CRM "'", + // Require message type (set by create_request()) + if (!pcmk__str_eq(type, T_CRM, pcmk__str_casei)) { + crm_warn("Ignoring invalid message %s with type '%s' (not '" T_CRM "')", ref, pcmk__s(type, "")); - crm_log_xml_warn(msg, "[bad message type]"); + crm_log_xml_trace(msg, "ignored"); return TRUE; + } - } else if (sys_to == NULL) { - crm_warn("Received invalid message %s: no subsystem", ref); - crm_log_xml_warn(msg, "[no subsystem]"); + // Require a destination subsystem (also set by create_request()) + if (sys_to == NULL) { + crm_warn("Ignoring invalid message %s with no " F_CRM_SYS_TO, ref); + crm_log_xml_trace(msg, "ignored"); return TRUE; } + // Get the message type appropriate to the destination subsystem + if (is_corosync_cluster()) { + dest = text2msg_type(sys_to); + if ((dest < crm_msg_ais) || (dest > crm_msg_stonith_ng)) { + /* Unrecognized value, use a sane default + * + * @TODO Maybe we should bail instead + */ + dest = crm_msg_crmd; + } + } + is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0); is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0); is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0); is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0); is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0); + // Check whether message should be processed locally is_local = false; - if (pcmk__str_empty(host_to)) { + if (broadcast) { if (is_for_dc || is_for_te) { is_local = false; @@ -397,6 +425,7 @@ relay_message(xmlNode * msg, gboolean originated_locally) } else if (pcmk__str_eq(controld_globals.our_nodename, host_to, pcmk__str_casei)) { is_local = true; + } else if (is_for_crm && pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) { xmlNode *msg_data = get_message_xml(msg, F_CRM_DATA); const char *mode = crm_element_value(msg_data, PCMK__XA_MODE); @@ -407,69 +436,68 @@ relay_message(xmlNode * msg, gboolean originated_locally) } } - if (is_for_dc || is_for_dcib || is_for_te) { - if (AM_I_DC && is_for_te) { - crm_trace("Route message %s locally as transition request", ref); - send_msg_via_ipc(msg, sys_to); + // Check whether message should be relayed - } else if (AM_I_DC) { + if (is_for_dc || is_for_dcib || is_for_te) { + if (AM_I_DC) { + if (is_for_te) { + crm_trace("Route message %s locally as transition request", + ref); + crm_log_xml_trace(msg, sys_to); + send_msg_via_ipc(msg, sys_to); + return TRUE; // No further processing of message is needed + } crm_trace("Route message %s locally as DC request", ref); return FALSE; // More to be done by caller + } - } else if (originated_locally && !pcmk__strcase_any_of(sys_from, CRM_SYSTEM_PENGINE, - CRM_SYSTEM_TENGINE, NULL)) { - - if (is_corosync_cluster()) { - dest = text2msg_type(sys_to); + if (originated_locally + && !pcmk__strcase_any_of(sys_from, CRM_SYSTEM_PENGINE, + CRM_SYSTEM_TENGINE, NULL)) { + crm_trace("Relay message %s to DC (via %s)", + ref, pcmk__s(host_to, "broadcast")); + crm_log_xml_trace(msg, "relayed"); + if (!broadcast) { + node_to = crm_get_peer(0, host_to); } - crm_trace("Relay message %s to DC", ref); - send_cluster_message(host_to ? crm_get_peer(0, host_to) : NULL, dest, msg, TRUE); - - } else { - /* Neither the TE nor the scheduler should be sending messages - * to DCs on other nodes. By definition, if we are no longer the DC, - * then the scheduler's or TE's data should be discarded. - */ - crm_trace("Discard message %s because we are not DC", ref); + send_cluster_message(node_to, dest, msg, TRUE); + return TRUE; } - } else if (is_local && (is_for_crm || is_for_cib)) { - crm_trace("Route message %s locally as controller request", ref); - return FALSE; // More to be done by caller - - } else if (is_local) { - crm_trace("Relay message %s locally to %s", - ref, (sys_to? sys_to : "unknown client")); - crm_log_xml_trace(msg, "[IPC relay]"); - send_msg_via_ipc(msg, sys_to); - - } else { - crm_node_t *node_to = NULL; - - if (is_corosync_cluster()) { - dest = text2msg_type(sys_to); + /* Transition engine and scheduler messages are sent only to the DC on + * the same node. If we are no longer the DC, discard this message. + */ + crm_trace("Ignoring message %s because we are no longer DC", ref); + crm_log_xml_trace(msg, "ignored"); + return TRUE; // No further processing of message is needed + } - if (dest == crm_msg_none || dest > crm_msg_stonith_ng) { - dest = crm_msg_crmd; - } + if (is_local) { + if (is_for_crm || is_for_cib) { + crm_trace("Route message %s locally as controller request", ref); + return FALSE; // More to be done by caller } + crm_trace("Relay message %s locally to %s", ref, sys_to); + crm_log_xml_trace(msg, "IPC-relay"); + send_msg_via_ipc(msg, sys_to); + return TRUE; + } - if (host_to) { - node_to = pcmk__search_cluster_node_cache(0, host_to); - if (node_to == NULL) { - crm_warn("Cannot route message %s: Unknown node %s", - ref, host_to); - return TRUE; - } - crm_trace("Relay message %s to %s", - ref, (node_to->uname? node_to->uname : "peer")); - } else { - crm_trace("Broadcast message %s to all peers", ref); + if (!broadcast) { + node_to = pcmk__search_cluster_node_cache(0, host_to, NULL); + if (node_to == NULL) { + crm_warn("Ignoring message %s because node %s is unknown", + ref, host_to); + crm_log_xml_trace(msg, "ignored"); + return TRUE; } - send_cluster_message(host_to ? node_to : NULL, dest, msg, TRUE); } - return TRUE; // No further processing of message is needed + crm_trace("Relay message %s to %s", + ref, pcmk__s(host_to, "all peers")); + crm_log_xml_trace(msg, "relayed"); + send_cluster_message(node_to, dest, msg, TRUE); + return TRUE; } // Return true if field contains a positive integer @@ -546,6 +574,7 @@ controld_authorize_ipc_message(const xmlNode *client_msg, pcmk__client_t *curr_c } crm_trace("Validated IPC hello from client %s", client_name); + crm_log_xml_trace(client_msg, "hello"); if (curr_client) { curr_client->userdata = strdup(client_name); } @@ -553,6 +582,7 @@ controld_authorize_ipc_message(const xmlNode *client_msg, pcmk__client_t *curr_c return false; rejected: + crm_log_xml_trace(client_msg, "rejected"); if (curr_client) { qb_ipcs_disconnect(curr_client->ipcs); } @@ -575,7 +605,9 @@ handle_message(xmlNode *msg, enum crmd_fsa_cause cause) return I_NULL; } - crm_err("Unknown message type: %s", type); + crm_warn("Ignoring message with unknown " F_CRM_MSG_TYPE " '%s'", + pcmk__s(type, "")); + crm_log_xml_trace(msg, "bad"); return I_NULL; } @@ -701,7 +733,7 @@ handle_lrm_delete(xmlNode *stored_msg) crm_info("Notifying %s on %s that %s was%s deleted", from_sys, (from_host? from_host : "local node"), rsc_id, ((rc == pcmk_rc_ok)? "" : " not")); - op = lrmd_new_event(rsc_id, CRMD_ACTION_DELETE, 0); + op = lrmd_new_event(rsc_id, PCMK_ACTION_DELETE, 0); op->type = lrmd_event_exec_complete; op->user_data = strdup(transition? transition : FAKE_TE_ID); op->params = pcmk__strkey_table(free, free); @@ -732,7 +764,7 @@ handle_remote_state(const xmlNode *msg) bool remote_is_up = false; int rc = pcmk_rc_ok; - rc = pcmk__xe_get_bool_attr(msg, XML_NODE_IN_CLUSTER, &remote_is_up); + rc = pcmk__xe_get_bool_attr(msg, PCMK__XA_IN_CCM, &remote_is_up); CRM_CHECK(remote_uname && rc == pcmk_rc_ok, return I_NULL); @@ -818,7 +850,7 @@ handle_node_list(const xmlNode *request) crm_xml_add_ll(xml, XML_ATTR_ID, (long long) node->id); // uint32_t crm_xml_add(xml, XML_ATTR_UNAME, node->uname); - crm_xml_add(xml, XML_NODE_IN_CLUSTER, node->state); + crm_xml_add(xml, PCMK__XA_IN_CCM, node->state); } // Create and send reply @@ -875,7 +907,7 @@ handle_node_info_request(const xmlNode *msg) if (node) { crm_xml_add(reply_data, XML_ATTR_ID, node->uuid); crm_xml_add(reply_data, XML_ATTR_UNAME, node->uname); - crm_xml_add(reply_data, XML_NODE_IS_PEER, node->state); + crm_xml_add(reply_data, PCMK__XA_CRMD, node->state); pcmk__xe_set_bool_attr(reply_data, XML_NODE_IS_REMOTE, pcmk_is_set(node->flags, crm_remote_node)); } @@ -988,14 +1020,15 @@ handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause) /* Optimize this for the DC - it has the most to do */ + crm_log_xml_trace(stored_msg, "request"); if (op == NULL) { - crm_log_xml_warn(stored_msg, "[request without " F_CRM_TASK "]"); + crm_warn("Ignoring request without " F_CRM_TASK); return I_NULL; } if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { const char *from = crm_element_value(stored_msg, F_CRM_HOST_FROM); - crm_node_t *node = pcmk__search_cluster_node_cache(0, from); + crm_node_t *node = pcmk__search_cluster_node_cache(0, from, NULL); pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN); if(AM_I_DC == FALSE) { @@ -1062,11 +1095,6 @@ handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause) if (controld_globals.fsa_state == S_HALT) { crm_debug("Forcing an election from S_HALT"); return I_ELECTION; -#if 0 - } else if (AM_I_DC) { - /* This is the old way of doing things but what is gained? */ - return I_ELECTION; -#endif } } else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) { @@ -1157,8 +1185,9 @@ handle_response(xmlNode *stored_msg) { const char *op = crm_element_value(stored_msg, F_CRM_TASK); + crm_log_xml_trace(stored_msg, "reply"); if (op == NULL) { - crm_log_xml_err(stored_msg, "Bad message"); + crm_warn("Ignoring reply without " F_CRM_TASK); } else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) { // Check whether scheduler answer been superseded by subsequent request @@ -1295,7 +1324,7 @@ broadcast_remote_state_message(const char *node_name, bool node_up) node_name, node_up? "coming up" : "going down"); crm_xml_add(msg, XML_ATTR_ID, node_name); - pcmk__xe_set_bool_attr(msg, XML_NODE_IN_CLUSTER, node_up); + pcmk__xe_set_bool_attr(msg, PCMK__XA_IN_CCM, node_up); if (node_up) { crm_xml_add(msg, PCMK__XA_CONN_HOST, controld_globals.our_nodename); diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c index 240a978..c813ceb 100644 --- a/daemons/controld/controld_metadata.c +++ b/daemons/controld/controld_metadata.c @@ -1,5 +1,5 @@ /* - * Copyright 2017-2022 the Pacemaker project contributors + * Copyright 2017-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -172,7 +172,7 @@ controld_cache_metadata(GHashTable *mdc, const lrmd_rsc_info_t *rsc, const char *action_name = crm_element_value(match, "name"); - if (pcmk__str_eq(action_name, CRMD_ACTION_RELOAD_AGENT, + if (pcmk__str_eq(action_name, PCMK_ACTION_RELOAD_AGENT, pcmk__str_none)) { if (ocf1_1) { controld_set_ra_flags(md, key, ra_supports_reload_agent); @@ -181,7 +181,7 @@ controld_cache_metadata(GHashTable *mdc, const lrmd_rsc_info_t *rsc, "because it does not support OCF 1.1 or later", key); } - } else if (!ocf1_1 && pcmk__str_eq(action_name, CRMD_ACTION_RELOAD, + } else if (!ocf1_1 && pcmk__str_eq(action_name, PCMK_ACTION_RELOAD, pcmk__str_casei)) { controld_set_ra_flags(md, key, ra_supports_legacy_reload); } diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c index f24b755..d692ef6 100644 --- a/daemons/controld/controld_remote_ra.c +++ b/daemons/controld/controld_remote_ra.c @@ -280,6 +280,7 @@ remote_node_up(const char *node_name) int call_opt; xmlNode *update, *state; crm_node_t *node; + lrm_state_t *connection_rsc = NULL; CRM_CHECK(node_name != NULL, return); crm_info("Announcing Pacemaker Remote node %s", node_name); @@ -301,6 +302,20 @@ remote_node_up(const char *node_name) purge_remote_node_attrs(call_opt, node); pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0); + /* Apply any start state that we were given from the environment on the + * remote node. + */ + connection_rsc = lrm_state_find(node->uname); + + if (connection_rsc != NULL) { + lrmd_t *lrm = connection_rsc->conn; + const char *start_state = lrmd__node_start_state(lrm); + + if (start_state) { + set_join_state(start_state, node->uname, node->uuid, true); + } + } + /* pacemaker_remote nodes don't participate in the membership layer, * so cluster nodes don't automatically get notified when they come and go. * We send a cluster message to the DC, and update the CIB node state entry, @@ -392,10 +407,11 @@ check_remote_node_state(const remote_ra_cmd_t *cmd) return; } - if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) { + if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) { remote_node_up(cmd->rsc_id); - } else if (pcmk__str_eq(cmd->action, "migrate_from", pcmk__str_casei)) { + } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_MIGRATE_FROM, + pcmk__str_casei)) { /* After a successful migration, we don't need to do remote_node_up() * because the DC already knows the node is up, and we don't want to * clear LRM history etc. We do need to add the remote node to this @@ -408,7 +424,7 @@ check_remote_node_state(const remote_ra_cmd_t *cmd) CRM_CHECK(node != NULL, return); pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0); - } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { + } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) { lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id); remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL; @@ -510,7 +526,8 @@ retry_start_cmd_cb(gpointer data) return FALSE; } cmd = ra_data->cur_cmd; - if (!pcmk__strcase_any_of(cmd->action, "start", "migrate_from", NULL)) { + if (!pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START, + PCMK_ACTION_MIGRATE_FROM, NULL)) { return FALSE; } update_remaining_timeout(cmd); @@ -681,7 +698,8 @@ remote_lrm_op_callback(lrmd_event_data_t * op) handle_remote_ra_stop(lrm_state, NULL); remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM); /* now fake the reply of a successful 'stop' */ - synthesize_lrmd_success(NULL, lrm_state->node_name, "stop"); + synthesize_lrmd_success(NULL, lrm_state->node_name, + PCMK_ACTION_STOP); } return; } @@ -695,8 +713,9 @@ remote_lrm_op_callback(lrmd_event_data_t * op) /* Start actions and migrate from actions complete after connection * comes back to us. */ - if (op->type == lrmd_event_connect && pcmk__strcase_any_of(cmd->action, "start", - "migrate_from", NULL)) { + if ((op->type == lrmd_event_connect) + && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START, + PCMK_ACTION_MIGRATE_FROM, NULL)) { if (op->connection_rc < 0) { update_remaining_timeout(cmd); @@ -731,7 +750,9 @@ remote_lrm_op_callback(lrmd_event_data_t * op) report_remote_ra_result(cmd); cmd_handled = TRUE; - } else if (op->type == lrmd_event_poke && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + } else if ((op->type == lrmd_event_poke) + && pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, + pcmk__str_casei)) { if (cmd->monitor_timeout_id) { g_source_remove(cmd->monitor_timeout_id); @@ -758,7 +779,9 @@ remote_lrm_op_callback(lrmd_event_data_t * op) } cmd_handled = TRUE; - } else if (op->type == lrmd_event_disconnect && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + } else if ((op->type == lrmd_event_disconnect) + && pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, + pcmk__str_casei)) { if (pcmk_is_set(ra_data->status, remote_active) && !pcmk_is_set(cmd->status, cmd_cancel)) { pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, @@ -771,7 +794,9 @@ remote_lrm_op_callback(lrmd_event_data_t * op) } cmd_handled = TRUE; - } else if (op->type == lrmd_event_new_client && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { + } else if ((op->type == lrmd_event_new_client) + && pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, + pcmk__str_casei)) { handle_remote_ra_stop(lrm_state, cmd); cmd_handled = TRUE; @@ -882,7 +907,8 @@ handle_remote_ra_exec(gpointer user_data) ra_data->cmds = g_list_remove_link(ra_data->cmds, first); g_list_free_1(first); - if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) { + if (pcmk__str_any_of(cmd->action, PCMK_ACTION_START, + PCMK_ACTION_MIGRATE_FROM, NULL)) { lrm_remote_clear_flags(lrm_state, expect_takeover | takeover_complete); if (handle_remote_ra_start(lrm_state, cmd, cmd->timeout) == pcmk_rc_ok) { @@ -894,7 +920,7 @@ handle_remote_ra_exec(gpointer user_data) } report_remote_ra_result(cmd); - } else if (!strcmp(cmd->action, "monitor")) { + } else if (!strcmp(cmd->action, PCMK_ACTION_MONITOR)) { if (lrm_state_is_connected(lrm_state) == TRUE) { rc = lrm_state_poke_connection(lrm_state); @@ -917,7 +943,7 @@ handle_remote_ra_exec(gpointer user_data) } report_remote_ra_result(cmd); - } else if (!strcmp(cmd->action, "stop")) { + } else if (!strcmp(cmd->action, PCMK_ACTION_STOP)) { if (pcmk_is_set(ra_data->status, expect_takeover)) { /* briefly wait on stop for the takeover event to occur. If the @@ -933,13 +959,14 @@ handle_remote_ra_exec(gpointer user_data) handle_remote_ra_stop(lrm_state, cmd); - } else if (!strcmp(cmd->action, "migrate_to")) { + } else if (strcmp(cmd->action, PCMK_ACTION_MIGRATE_TO) == 0) { lrm_remote_clear_flags(lrm_state, takeover_complete); lrm_remote_set_flags(lrm_state, expect_takeover); pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); report_remote_ra_result(cmd); - } else if (pcmk__str_any_of(cmd->action, CRMD_ACTION_RELOAD, - CRMD_ACTION_RELOAD_AGENT, NULL)) { + + } else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_RELOAD, + PCMK_ACTION_RELOAD_AGENT, NULL)) { /* Currently the only reloadable parameter is reconnect_interval, * which is only used by the scheduler via the CIB, so reloads are a * no-op. @@ -1029,13 +1056,13 @@ static gboolean is_remote_ra_supported_action(const char *action) { return pcmk__str_any_of(action, - CRMD_ACTION_START, - CRMD_ACTION_STOP, - CRMD_ACTION_STATUS, - CRMD_ACTION_MIGRATE, - CRMD_ACTION_MIGRATED, - CRMD_ACTION_RELOAD_AGENT, - CRMD_ACTION_RELOAD, + PCMK_ACTION_START, + PCMK_ACTION_STOP, + PCMK_ACTION_MONITOR, + PCMK_ACTION_MIGRATE_TO, + PCMK_ACTION_MIGRATE_FROM, + PCMK_ACTION_RELOAD_AGENT, + PCMK_ACTION_RELOAD, NULL); } @@ -1048,7 +1075,9 @@ fail_all_monitor_cmds(GList * list) for (gIter = list; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; - if ((cmd->interval_ms > 0) && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + if ((cmd->interval_ms > 0) + && pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, + pcmk__str_casei)) { rm_list = g_list_append(rm_list, cmd); } } @@ -1137,8 +1166,9 @@ handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms, if (ra_data->cur_cmd && !pcmk_is_set(ra_data->cur_cmd->status, cmd_cancel) && - (ra_data->cur_cmd->interval_ms == interval_ms) && - pcmk__str_eq(ra_data->cur_cmd->action, "monitor", pcmk__str_casei)) { + (ra_data->cur_cmd->interval_ms == interval_ms) + && pcmk__str_eq(ra_data->cur_cmd->action, PCMK_ACTION_MONITOR, + pcmk__str_casei)) { cmd = ra_data->cur_cmd; goto handle_dup; @@ -1147,7 +1177,8 @@ handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms, for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; if ((cmd->interval_ms == interval_ms) - && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + && pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, + pcmk__str_casei)) { goto handle_dup; } } @@ -1155,7 +1186,8 @@ handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms, for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; if ((cmd->interval_ms == interval_ms) - && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + && pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, + pcmk__str_casei)) { goto handle_dup; } } @@ -1165,7 +1197,7 @@ handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms, handle_dup: crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT, - cmd->rsc_id, "monitor", interval_ms); + cmd->rsc_id, PCMK_ACTION_MONITOR, interval_ms); /* update the userdata */ if (userdata) { @@ -1385,7 +1417,7 @@ remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance) } #define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \ - "[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \ + "[@" XML_LRM_ATTR_TASK "='" PCMK_ACTION_MAINTENANCE_NODES "']/" \ XML_GRAPH_TAG_MAINTENANCE /*! @@ -1403,9 +1435,10 @@ remote_ra_process_maintenance_nodes(xmlNode *xml) xmlNode *node; int cnt = 0, cnt_remote = 0; - for (node = - first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE); - node != NULL; node = pcmk__xml_next(node)) { + for (node = first_named_child(getXpathResult(search, 0), + XML_CIB_TAG_NODE); + node != NULL; node = crm_next_same_xml(node)) { + lrm_state_t *lrm_state = lrm_state_find(ID(node)); cnt++; diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c index 912f9a5..8aca83f 100644 --- a/daemons/controld/controld_schedulerd.c +++ b/daemons/controld/controld_schedulerd.c @@ -45,11 +45,11 @@ controld_shutdown_schedulerd_ipc(void) * \internal * \brief Save CIB query result to file, raising FSA error * - * \param[in] msg Ignored - * \param[in] call_id Call ID of CIB query - * \param[in] rc Return code of CIB query - * \param[in,out] output Result of CIB query - * \param[in] user_data Unique identifier for filename + * \param[in] msg Ignored + * \param[in] call_id Call ID of CIB query + * \param[in] rc Return code of CIB query + * \param[in] output Result of CIB query + * \param[in] user_data Unique identifier for filename * * \note This is intended to be called after a scheduler connection fails. */ @@ -90,8 +90,9 @@ handle_disconnect(void) int rc = pcmk_ok; char *uuid_str = crm_generate_uuid(); - crm_crit("Connection to the scheduler failed " - CRM_XS " uuid=%s", uuid_str); + crm_crit("Lost connection to the scheduler " + CRM_XS " CIB will be saved to " PE_STATE_DIR "/pe-core-%s.bz2", + uuid_str); /* * The scheduler died... @@ -107,9 +108,6 @@ handle_disconnect(void) NULL, NULL, cib_scope_local); fsa_register_cib_callback(rc, uuid_str, save_cib_contents); - - } else { - crm_info("Connection to the scheduler released"); } controld_clear_fsa_input_flags(R_PE_CONNECTED); @@ -199,9 +197,10 @@ new_schedulerd_ipc_connection(void) pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL); - rc = pcmk_connect_ipc(schedulerd_api, pcmk_ipc_dispatch_main); + rc = pcmk__connect_ipc(schedulerd_api, pcmk_ipc_dispatch_main, 3); if (rc != pcmk_rc_ok) { - crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc)); + crm_err("Error connecting to %s: %s", + pcmk_ipc_name(schedulerd_api, true), pcmk_rc_str(rc)); return false; } diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c index d8cfcad..fe6b744 100644 --- a/daemons/controld/controld_te_actions.c +++ b/daemons/controld/controld_te_actions.c @@ -47,7 +47,7 @@ execute_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *pseudo) const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK); /* send to peers as well? */ - if (pcmk__str_eq(task, CRM_OP_MAINTENANCE_NODES, pcmk__str_casei)) { + if (pcmk__str_eq(task, PCMK_ACTION_MAINTENANCE_NODES, pcmk__str_casei)) { GHashTableIter iter; crm_node_t *node = NULL; @@ -125,7 +125,7 @@ execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if (router_node == NULL) { router_node = on_node; - if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_none)) { + if (pcmk__str_eq(task, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) { const char *mode = crm_element_value(action->xml, PCMK__XA_MODE); if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_none)) { @@ -148,7 +148,8 @@ execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) id, task, on_node, (is_local? " locally" : ""), (no_wait? " without waiting" : "")); - if (is_local && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_none)) { + if (is_local + && pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { /* defer until everything else completes */ crm_info("Controller request '%s' is a local shutdown", id); graph->completion_action = pcmk__graph_shutdown; @@ -156,7 +157,7 @@ execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) te_action_confirmed(action, graph); return pcmk_rc_ok; - } else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_none)) { + } else if (pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { crm_node_t *peer = crm_get_peer(0, router_node); pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN); @@ -318,7 +319,7 @@ controld_record_action_timeout(pcmk__graph_action_t *action) int target_rc = get_target_rc(action); crm_warn("%s %d: %s on %s timed out", - crm_element_name(action->xml), action->id, task_uuid, target); + action->xml->name, action->id, task_uuid, target); op = synthesize_timeout_event(action, target_rc); controld_record_action_event(action, op); @@ -528,9 +529,9 @@ te_update_job_count(pcmk__graph_action_t *action, int offset) * the connection resources */ target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); - if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, - CRMD_ACTION_MIGRATED, NULL)) { - + if ((target == NULL) + && pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO, + PCMK_ACTION_MIGRATE_FROM, NULL)) { const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE); const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET); @@ -586,7 +587,8 @@ allowed_on_node(const pcmk__graph_t *graph, const pcmk__graph_action_t *action, return false; } else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) { - if (pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) { + if (pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO, + PCMK_ACTION_MIGRATE_FROM, NULL)) { crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s", target, graph->migration_limit, r->migrate_jobs, id); return false; @@ -624,8 +626,9 @@ graph_action_allowed(pcmk__graph_t *graph, pcmk__graph_action_t *action) * the connection resources */ target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); - if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, - CRMD_ACTION_MIGRATED, NULL)) { + if ((target == NULL) + && pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO, + PCMK_ACTION_MIGRATE_FROM, NULL)) { target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE); if (!allowed_on_node(graph, action, target)) { return false; diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c index cf9de83..c26e757 100644 --- a/daemons/controld/controld_te_callbacks.c +++ b/daemons/controld/controld_te_callbacks.c @@ -225,12 +225,12 @@ process_resource_updates(const char *node, xmlNode *xml, xmlNode *change, return; } - if (strcmp(TYPE(xml), XML_CIB_TAG_LRM) == 0) { + if (pcmk__xe_is(xml, XML_CIB_TAG_LRM)) { xml = first_named_child(xml, XML_LRM_TAG_RESOURCES); CRM_CHECK(xml != NULL, return); } - CRM_CHECK(strcmp(TYPE(xml), XML_LRM_TAG_RESOURCES) == 0, return); + CRM_CHECK(pcmk__xe_is(xml, XML_LRM_TAG_RESOURCES), return); /* * Updates by, or in response to, TE actions will never contain updates @@ -558,7 +558,7 @@ te_update_diff(const char *event, xmlNode * msg) p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2], fsa_state2string(controld_globals.fsa_state)); - crm_element_value_int(diff, "format", &format); + crm_element_value_int(diff, PCMK_XA_FORMAT, &format); switch (format) { case 1: te_update_diff_v1(event, diff); diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c index d4e2b0f..28977c0 100644 --- a/daemons/controld/controld_te_events.c +++ b/daemons/controld/controld_te_events.c @@ -111,7 +111,7 @@ fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node) } else if (action->type == pcmk__cluster_graph_action) { const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); - if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) { + if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) { continue; } } @@ -196,16 +196,16 @@ update_failcount(const xmlNode *event, const char *event_node_uuid, int rc, /* Decide whether update is necessary and what value to use */ if ((interval_ms > 0) - || pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_none) - || pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_none)) { + || pcmk__str_eq(task, PCMK_ACTION_PROMOTE, pcmk__str_none) + || pcmk__str_eq(task, PCMK_ACTION_DEMOTE, pcmk__str_none)) { do_update = TRUE; - } else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_none)) { + } else if (pcmk__str_eq(task, PCMK_ACTION_START, pcmk__str_none)) { do_update = TRUE; value = pcmk__s(controld_globals.transition_graph->failed_start_offset, CRM_INFINITY_S); - } else if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_none)) { + } else if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_none)) { do_update = TRUE; value = pcmk__s(controld_globals.transition_graph->failed_stop_offset, CRM_INFINITY_S); @@ -314,7 +314,7 @@ get_cancel_action(const char *id, const char *node) pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data; task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); - if (!pcmk__str_eq(CRMD_ACTION_CANCEL, task, pcmk__str_casei)) { + if (!pcmk__str_eq(PCMK_ACTION_CANCEL, task, pcmk__str_casei)) { continue; } diff --git a/daemons/controld/controld_te_utils.c b/daemons/controld/controld_te_utils.c index ecbc0b2..5a9f029 100644 --- a/daemons/controld/controld_te_utils.c +++ b/daemons/controld/controld_te_utils.c @@ -1,5 +1,5 @@ /* - * Copyright 2004-2022 the Pacemaker project contributors + * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -17,6 +17,8 @@ //! Triggers transition graph processing static crm_trigger_t *transition_trigger = NULL; +static GHashTable *node_pending_timers = NULL; + gboolean stop_te_timer(pcmk__graph_action_t *action) { @@ -132,11 +134,13 @@ static struct abort_timer_s { static gboolean abort_timer_popped(gpointer data) { - if (AM_I_DC && (abort_timer.aborted == FALSE)) { - abort_transition(abort_timer.priority, abort_timer.action, - abort_timer.text, NULL); + struct abort_timer_s *abort_timer = (struct abort_timer_s *) data; + + if (AM_I_DC && (abort_timer->aborted == FALSE)) { + abort_transition(abort_timer->priority, abort_timer->action, + abort_timer->text, NULL); } - abort_timer.id = 0; + abort_timer->id = 0; return FALSE; // do not immediately reschedule timer } @@ -158,7 +162,143 @@ abort_after_delay(int abort_priority, enum pcmk__graph_next abort_action, abort_timer.priority = abort_priority; abort_timer.action = abort_action; abort_timer.text = abort_text; - abort_timer.id = g_timeout_add(delay_ms, abort_timer_popped, NULL); + abort_timer.id = g_timeout_add(delay_ms, abort_timer_popped, &abort_timer); +} + +static void +free_node_pending_timer(gpointer data) +{ + struct abort_timer_s *node_pending_timer = (struct abort_timer_s *) data; + + if (node_pending_timer->id != 0) { + g_source_remove(node_pending_timer->id); + node_pending_timer->id = 0; + } + + free(node_pending_timer); +} + +static gboolean +node_pending_timer_popped(gpointer key) +{ + struct abort_timer_s *node_pending_timer = NULL; + + if (node_pending_timers == NULL) { + return FALSE; + } + + node_pending_timer = g_hash_table_lookup(node_pending_timers, key); + if (node_pending_timer == NULL) { + return FALSE; + } + + crm_warn("Node with id '%s' pending timed out (%us) on joining the process " + "group", + (const char *) key, controld_globals.node_pending_timeout); + + if (controld_globals.node_pending_timeout > 0) { + abort_timer_popped(node_pending_timer); + } + + g_hash_table_remove(node_pending_timers, key); + + return FALSE; // do not reschedule timer +} + +static void +init_node_pending_timer(const crm_node_t *node, guint timeout) +{ + struct abort_timer_s *node_pending_timer = NULL; + char *key = NULL; + + if (node->uuid == NULL) { + return; + } + + if (node_pending_timers == NULL) { + node_pending_timers = pcmk__strikey_table(free, + free_node_pending_timer); + + // The timer is somehow already existing + } else if (g_hash_table_lookup(node_pending_timers, node->uuid) != NULL) { + return; + } + + crm_notice("Waiting for pending %s with id '%s' to join the process " + "group (timeout=%us)", + node->uname ? node->uname : "node", node->uuid, + controld_globals.node_pending_timeout); + + node_pending_timer = calloc(1, sizeof(struct abort_timer_s)); + CRM_ASSERT(node_pending_timer != NULL); + + node_pending_timer->aborted = FALSE; + node_pending_timer->priority = INFINITY; + node_pending_timer->action = pcmk__graph_restart; + node_pending_timer->text = "Node pending timed out"; + + key = strdup(node->uuid); + CRM_ASSERT(key != NULL); + + g_hash_table_replace(node_pending_timers, key, node_pending_timer); + + node_pending_timer->id = g_timeout_add_seconds(timeout, + node_pending_timer_popped, + key); + CRM_ASSERT(node_pending_timer->id != 0); +} + +static void +remove_node_pending_timer(const char *node_uuid) +{ + if (node_pending_timers == NULL) { + return; + } + + g_hash_table_remove(node_pending_timers, node_uuid); +} + +void +controld_node_pending_timer(const crm_node_t *node) +{ + long long remaining_timeout = 0; + + /* If the node is not an active cluster node, is leaving the cluster, or is + * already part of CPG, or node-pending-timeout is disabled, free any + * node pending timer for it. + */ + if (pcmk_is_set(node->flags, crm_remote_node) + || (node->when_member <= 1) || (node->when_online > 0) + || (controld_globals.node_pending_timeout == 0)) { + remove_node_pending_timer(node->uuid); + return; + } + + // Node is a cluster member but offline in CPG + + remaining_timeout = node->when_member - time(NULL) + + controld_globals.node_pending_timeout; + + /* It already passed node pending timeout somehow. + * Free any node pending timer of it. + */ + if (remaining_timeout <= 0) { + remove_node_pending_timer(node->uuid); + return; + } + + init_node_pending_timer(node, remaining_timeout); +} + +void +controld_free_node_pending_timers(void) +{ + if (node_pending_timers == NULL) { + return; + } + + g_hash_table_destroy(node_pending_timers); + node_pending_timers = NULL; } static const char * @@ -246,7 +386,7 @@ abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action, const xmlNode *search = NULL; for(search = reason; search; search = search->parent) { - if (pcmk__str_eq(XML_TAG_DIFF, TYPE(search), pcmk__str_casei)) { + if (pcmk__xe_is(search, XML_TAG_DIFF)) { diff = search; break; } @@ -255,7 +395,7 @@ abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action, if(diff) { xml_patch_versions(diff, add, del); for(search = reason; search; search = search->parent) { - if (pcmk__str_eq(XML_DIFF_CHANGE, TYPE(search), pcmk__str_casei)) { + if (pcmk__xe_is(search, XML_DIFF_CHANGE)) { change = search; break; } @@ -276,14 +416,13 @@ abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action, do_crm_log(level, "Transition %d aborted by %s.%s: %s " CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", - controld_globals.transition_graph->id, TYPE(reason), + controld_globals.transition_graph->id, reason->name, ID(reason), abort_text, add[0], add[1], add[2], fn, line, (const char *) local_path->str, pcmk__btoa(controld_globals.transition_graph->complete)); g_string_free(local_path, TRUE); } else { - const char *kind = NULL; const char *op = crm_element_value(change, XML_DIFF_OP); const char *path = crm_element_value(change, XML_DIFF_PATH); @@ -297,9 +436,9 @@ abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action, reason = reason->children; } } + CRM_CHECK(reason != NULL, goto done); } - kind = TYPE(reason); if(strcmp(op, "delete") == 0) { const char *shortpath = strrchr(path, '/'); @@ -310,7 +449,7 @@ abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action, add[0], add[1], add[2], fn, line, path, pcmk__btoa(controld_globals.transition_graph->complete)); - } else if (pcmk__str_eq(XML_CIB_TAG_NVPAIR, kind, pcmk__str_none)) { + } else if (pcmk__xe_is(reason, XML_CIB_TAG_NVPAIR)) { do_crm_log(level, "Transition %d aborted by %s doing %s %s=%s: %s " CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", controld_globals.transition_graph->id, @@ -320,7 +459,7 @@ abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action, abort_text, add[0], add[1], add[2], fn, line, path, pcmk__btoa(controld_globals.transition_graph->complete)); - } else if (pcmk__str_eq(XML_LRM_TAG_RSC_OP, kind, pcmk__str_none)) { + } else if (pcmk__xe_is(reason, XML_LRM_TAG_RSC_OP)) { const char *magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC); do_crm_log(level, "Transition %d aborted by operation %s '%s' on %s: %s " @@ -331,14 +470,15 @@ abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action, magic, add[0], add[1], add[2], fn, line, pcmk__btoa(controld_globals.transition_graph->complete)); - } else if (pcmk__str_any_of(kind, XML_CIB_TAG_STATE, XML_CIB_TAG_NODE, NULL)) { + } else if (pcmk__str_any_of((const char *) reason->name, + XML_CIB_TAG_STATE, XML_CIB_TAG_NODE, NULL)) { const char *uname = crm_peer_uname(ID(reason)); do_crm_log(level, "Transition %d aborted by %s '%s' on %s: %s " CRM_XS " cib=%d.%d.%d source=%s:%d complete=%s", controld_globals.transition_graph->id, - kind, op, (uname? uname : ID(reason)), abort_text, - add[0], add[1], add[2], fn, line, + reason->name, op, pcmk__s(uname, ID(reason)), + abort_text, add[0], add[1], add[2], fn, line, pcmk__btoa(controld_globals.transition_graph->complete)); } else { @@ -347,12 +487,13 @@ abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action, do_crm_log(level, "Transition %d aborted by %s.%s '%s': %s " CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", controld_globals.transition_graph->id, - TYPE(reason), (id? id : ""), (op? op : "change"), + reason->name, pcmk__s(id, ""), pcmk__s(op, "change"), abort_text, add[0], add[1], add[2], fn, line, path, pcmk__btoa(controld_globals.transition_graph->complete)); } } +done: if (controld_globals.transition_graph->complete) { if (controld_get_period_transition_timer() > 0) { controld_stop_transition_timer(); diff --git a/daemons/controld/controld_throttle.c b/daemons/controld/controld_throttle.c index 5b7f9c0..a4775e5 100644 --- a/daemons/controld/controld_throttle.c +++ b/daemons/controld/controld_throttle.c @@ -154,7 +154,7 @@ throttle_cib_load(float *load) if(stream == NULL) { int rc = errno; - crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_strerror(rc), rc); + crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_rc_str(rc), rc); free(loadfile); loadfile = NULL; return FALSE; } @@ -220,7 +220,7 @@ throttle_load_avg(float *load) stream = fopen(loadfile, "r"); if(stream == NULL) { int rc = errno; - crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_strerror(rc), rc); + crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_rc_str(rc), rc); return FALSE; } @@ -407,7 +407,7 @@ static void throttle_update_job_max(const char *preference) { long long max = 0LL; - const char *env_limit = getenv("PCMK_node_action_limit"); + const char *env_limit = pcmk__env_option(PCMK__ENV_NODE_ACTION_LIMIT); if (env_limit != NULL) { preference = env_limit; // Per-node override diff --git a/daemons/controld/controld_transition.c b/daemons/controld/controld_transition.c index c8a342c..897c6d3 100644 --- a/daemons/controld/controld_transition.c +++ b/daemons/controld/controld_transition.c @@ -1,5 +1,5 @@ /* - * Copyright 2004-2022 the Pacemaker project contributors + * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -15,11 +15,6 @@ #include <pacemaker-controld.h> -static void -global_cib_callback(const xmlNode * msg, int callid, int rc, xmlNode * output) -{ -} - static pcmk__graph_t * create_blank_graph(void) { @@ -82,12 +77,6 @@ do_te_control(long long action, crm_err("Could not set CIB notification callback"); init_ok = FALSE; } - - if (cib_conn->cmds->set_op_callback(cib_conn, - global_cib_callback) != pcmk_ok) { - crm_err("Could not set CIB global callback"); - init_ok = FALSE; - } } if (init_ok) { diff --git a/daemons/controld/controld_transition.h b/daemons/controld/controld_transition.h index 2da4221..0655bd9 100644 --- a/daemons/controld/controld_transition.h +++ b/daemons/controld/controld_transition.h @@ -48,6 +48,8 @@ void controld_destroy_transition_trigger(void); void controld_trigger_graph_as(const char *fn, int line); void abort_after_delay(int abort_priority, enum pcmk__graph_next abort_action, const char *abort_text, guint delay_ms); +void controld_node_pending_timer(const crm_node_t *node); +void controld_free_node_pending_timers(void); void abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action, const char *abort_text, const xmlNode *reason, diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c index 4ce09d9..9b306ee 100644 --- a/daemons/controld/controld_utils.c +++ b/daemons/controld/controld_utils.c @@ -828,7 +828,7 @@ get_node_id(xmlNode *lrm_rsc_op) { xmlNode *node = lrm_rsc_op; - while (node != NULL && !pcmk__str_eq(XML_CIB_TAG_STATE, TYPE(node), pcmk__str_casei)) { + while ((node != NULL) && !pcmk__xe_is(node, XML_CIB_TAG_STATE)) { node = node->parent; } diff --git a/daemons/controld/pacemaker-controld.c b/daemons/controld/pacemaker-controld.c index 5858898..e4a72c2 100644 --- a/daemons/controld/pacemaker-controld.c +++ b/daemons/controld/pacemaker-controld.c @@ -112,7 +112,7 @@ main(int argc, char **argv) goto done; } - if (crm_ipc_connect(old_instance)) { + if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) { /* IPC end-point already up */ crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); diff --git a/daemons/controld/pacemaker-controld.h b/daemons/controld/pacemaker-controld.h index 1484a00..2334cce 100644 --- a/daemons/controld/pacemaker-controld.h +++ b/daemons/controld/pacemaker-controld.h @@ -36,4 +36,7 @@ void controld_remove_voter(const char *uname); void controld_election_fini(void); void controld_stop_current_election_timeout(void); +void set_join_state(const char *start_state, const char *node_name, + const char *node_uuid, bool remote); + #endif diff --git a/daemons/execd/Makefile.am b/daemons/execd/Makefile.am index 466f0df..ab8544f 100644 --- a/daemons/execd/Makefile.am +++ b/daemons/execd/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright 2012-2021 the Pacemaker project contributors +# Copyright 2012-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # @@ -12,18 +12,20 @@ include $(top_srcdir)/mk/man.mk halibdir = $(CRM_DAEMON_DIR) -halib_PROGRAMS = pacemaker-execd cts-exec-helper +halib_PROGRAMS = pacemaker-execd \ + cts-exec-helper EXTRA_DIST = pacemaker-remoted.8.inc pacemaker_execd_CFLAGS = $(CFLAGS_HARDENED_EXE) pacemaker_execd_LDFLAGS = $(LDFLAGS_HARDENED_EXE) -pacemaker_execd_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/services/libcrmservice.la \ - $(top_builddir)/lib/fencing/libstonithd.la -pacemaker_execd_SOURCES = pacemaker-execd.c execd_commands.c \ - execd_alerts.c +pacemaker_execd_LDADD = $(top_builddir)/lib/fencing/libstonithd.la +pacemaker_execd_LDADD += $(top_builddir)/lib/services/libcrmservice.la +pacemaker_execd_LDADD += $(top_builddir)/lib/common/libcrmcommon.la +pacemaker_execd_SOURCES = pacemaker-execd.c \ + execd_commands.c \ + execd_alerts.c if BUILD_REMOTE sbin_PROGRAMS = pacemaker-remoted @@ -34,22 +36,27 @@ initdir = $(INITDIR) init_SCRIPTS = pacemaker_remote endif -pacemaker_remoted_CPPFLAGS = -DPCMK__COMPILE_REMOTE $(AM_CPPFLAGS) +pacemaker_remoted_CPPFLAGS = -DPCMK__COMPILE_REMOTE \ + $(AM_CPPFLAGS) pacemaker_remoted_CFLAGS = $(CFLAGS_HARDENED_EXE) pacemaker_remoted_LDFLAGS = $(LDFLAGS_HARDENED_EXE) -pacemaker_remoted_LDADD = $(pacemaker_execd_LDADD) \ - $(top_builddir)/lib/lrmd/liblrmd.la -pacemaker_remoted_SOURCES = $(pacemaker_execd_SOURCES) \ - remoted_tls.c remoted_pidone.c remoted_proxy.c +pacemaker_remoted_LDADD = $(top_builddir)/lib/fencing/libstonithd.la +pacemaker_remoted_LDADD += $(top_builddir)/lib/services/libcrmservice.la +pacemaker_remoted_LDADD += $(top_builddir)/lib/lrmd/liblrmd.la +pacemaker_remoted_LDADD += $(top_builddir)/lib/common/libcrmcommon.la +pacemaker_remoted_SOURCES = $(pacemaker_execd_SOURCES) \ + remoted_tls.c \ + remoted_pidone.c \ + remoted_proxy.c endif -cts_exec_helper_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/lrmd/liblrmd.la \ - $(top_builddir)/lib/cib/libcib.la \ - $(top_builddir)/lib/services/libcrmservice.la \ - $(top_builddir)/lib/pengine/libpe_status.la +cts_exec_helper_LDADD = $(top_builddir)/lib/pengine/libpe_status.la +cts_exec_helper_LDADD += $(top_builddir)/lib/cib/libcib.la +cts_exec_helper_LDADD += $(top_builddir)/lib/lrmd/liblrmd.la +cts_exec_helper_LDADD += $(top_builddir)/lib/services/libcrmservice.la +cts_exec_helper_LDADD += $(top_builddir)/lib/common/libcrmcommon.la cts_exec_helper_SOURCES = cts-exec-helper.c noinst_HEADERS = pacemaker-execd.h @@ -59,6 +66,7 @@ CLEANFILES = $(man8_MANS) # Always create a symlink for the old pacemaker_remoted name, so that bundle # container images using a current Pacemaker will run on cluster nodes running # Pacemaker 1 (>=1.1.17). +.PHONY: install-exec-hook install-exec-hook: if BUILD_LEGACY_LINKS cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f lrmd && $(LN_S) pacemaker-execd lrmd @@ -67,6 +75,7 @@ if BUILD_REMOTE cd $(DESTDIR)$(sbindir) && rm -f pacemaker_remoted && $(LN_S) pacemaker-remoted pacemaker_remoted endif +.PHONY: uninstall-hook uninstall-hook: if BUILD_LEGACY_LINKS cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f lrmd diff --git a/daemons/execd/cts-exec-helper.c b/daemons/execd/cts-exec-helper.c index 2af5e16..6ebbedf 100644 --- a/daemons/execd/cts-exec-helper.c +++ b/daemons/execd/cts-exec-helper.c @@ -443,9 +443,9 @@ static int generate_params(void) { int rc = pcmk_rc_ok; - pe_working_set_t *data_set = NULL; + pcmk_scheduler_t *scheduler = NULL; xmlNode *cib_xml_copy = NULL; - pe_resource_t *rsc = NULL; + pcmk_resource_t *rsc = NULL; GHashTable *params = NULL; GHashTable *meta = NULL; GHashTableIter iter; @@ -467,27 +467,29 @@ generate_params(void) } // Calculate cluster status - data_set = pe_new_working_set(); - if (data_set == NULL) { - crm_crit("Could not allocate working set"); + scheduler = pe_new_working_set(); + if (scheduler == NULL) { + crm_crit("Could not allocate scheduler data"); return ENOMEM; } - pe__set_working_set_flags(data_set, pe_flag_no_counts|pe_flag_no_compat); - data_set->input = cib_xml_copy; - data_set->now = crm_time_new(NULL); - cluster_status(data_set); + pe__set_working_set_flags(scheduler, + pcmk_sched_no_counts|pcmk_sched_no_compat); + scheduler->input = cib_xml_copy; + scheduler->now = crm_time_new(NULL); + cluster_status(scheduler); // Find resource in CIB - rsc = pe_find_resource_with_flags(data_set->resources, options.rsc_id, - pe_find_renamed|pe_find_any); + rsc = pe_find_resource_with_flags(scheduler->resources, options.rsc_id, + pcmk_rsc_match_history + |pcmk_rsc_match_basename); if (rsc == NULL) { crm_err("Resource does not exist in config"); - pe_free_working_set(data_set); + pe_free_working_set(scheduler); return EINVAL; } // Add resource instance parameters to options.params - params = pe_rsc_params(rsc, NULL, data_set); + params = pe_rsc_params(rsc, NULL, scheduler); if (params != NULL) { g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) &key, @@ -498,7 +500,7 @@ generate_params(void) // Add resource meta-attributes to options.params meta = pcmk__strkey_table(free, free); - get_meta_attributes(meta, rsc, NULL, data_set); + get_meta_attributes(meta, rsc, NULL, scheduler); g_hash_table_iter_init(&iter, meta); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { @@ -509,7 +511,7 @@ generate_params(void) } g_hash_table_destroy(meta); - pe_free_working_set(data_set); + pe_free_working_set(scheduler); return rc; } @@ -587,7 +589,7 @@ main(int argc, char **argv) goto done; } options.api_call = "exec"; - options.action = "monitor"; + options.action = PCMK_ACTION_MONITOR; options.exec_call_opts = lrmd_opt_notify_orig_only; } diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c index fa2761e..cf4503a 100644 --- a/daemons/execd/execd_commands.c +++ b/daemons/execd/execd_commands.c @@ -213,7 +213,7 @@ log_finished(const lrmd_cmd_t *cmd, int exec_time_ms, int queue_time_ms) int log_level = LOG_INFO; GString *str = g_string_sized_new(100); // reasonable starting size - if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) { log_level = LOG_DEBUG; } @@ -253,7 +253,7 @@ log_execute(lrmd_cmd_t * cmd) { int log_level = LOG_INFO; - if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) { log_level = LOG_DEBUG; } @@ -264,9 +264,9 @@ log_execute(lrmd_cmd_t * cmd) static const char * normalize_action_name(lrmd_rsc_t * rsc, const char *action) { - if (pcmk__str_eq(action, "monitor", pcmk__str_casei) && + if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_casei) && pcmk_is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) { - return "status"; + return PCMK_ACTION_STATUS; } return action; } @@ -517,7 +517,7 @@ schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) /* The controller expects the executor to automatically cancel * recurring operations before a resource stops. */ - if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { + if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) { cancel_all_recurring(rsc, NULL); } @@ -844,7 +844,8 @@ action_complete(svc_action_t * action) if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) { if (pcmk__result_ok(&(cmd->result)) - && pcmk__strcase_any_of(cmd->action, "start", "stop", NULL)) { + && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START, + PCMK_ACTION_STOP, NULL)) { /* systemd returns from start and stop actions after the action * begins, not after it completes. We have to jump through a few * hoops so that we don't report 'complete' to the rest of pacemaker @@ -852,7 +853,7 @@ action_complete(svc_action_t * action) */ goagain = true; cmd->real_action = cmd->action; - cmd->action = strdup("monitor"); + cmd->action = strdup(PCMK_ACTION_MONITOR); } else if (cmd->real_action != NULL) { // This is follow-up monitor to check whether start/stop completed @@ -860,7 +861,8 @@ action_complete(svc_action_t * action) goagain = true; } else if (pcmk__result_ok(&(cmd->result)) - && pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) { + && pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP, + pcmk__str_casei)) { goagain = true; } else { @@ -878,9 +880,11 @@ action_complete(svc_action_t * action) if ((cmd->result.execution_status == PCMK_EXEC_DONE) && (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) { - if (pcmk__str_eq(cmd->real_action, "start", pcmk__str_casei)) { + if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_START, + pcmk__str_casei)) { cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR; - } else if (pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) { + } else if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP, + pcmk__str_casei)) { cmd->result.exit_status = PCMK_OCF_OK; } } @@ -891,12 +895,12 @@ action_complete(svc_action_t * action) #if SUPPORT_NAGIOS if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) { - if (action_matches(cmd, "monitor", 0) + if (action_matches(cmd, PCMK_ACTION_MONITOR, 0) && pcmk__result_ok(&(cmd->result))) { /* Successfully executed --version for the nagios plugin */ cmd->result.exit_status = PCMK_OCF_NOT_RUNNING; - } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei) + } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei) && !pcmk__result_ok(&(cmd->result))) { #ifdef PCMK__TIME_USE_CGT goagain = true; @@ -1007,11 +1011,11 @@ stonith_action_complete(lrmd_cmd_t *cmd, int exit_status, /* This should be possible only for probes in practice, but * interpret for all actions to be safe. */ - if (pcmk__str_eq(cmd->action, CRMD_ACTION_STATUS, + if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_none)) { exit_status = PCMK_OCF_NOT_RUNNING; - } else if (pcmk__str_eq(cmd->action, CRMD_ACTION_STOP, + } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) { exit_status = PCMK_OCF_OK; @@ -1035,11 +1039,12 @@ stonith_action_complete(lrmd_cmd_t *cmd, int exit_status, // Certain successful actions change the known state of the resource if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) { - if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) { + if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) { pcmk__set_result(&rsc->fence_probe_result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); // "running" - } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { + } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, + pcmk__str_casei)) { pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, NULL); // "not running" } @@ -1235,7 +1240,7 @@ execute_stonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) stonith_t *stonith_api = get_stonith_connection(); - if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei) + if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei) && (cmd->interval_ms == 0)) { // Probes don't require a fencer connection stonith_action_complete(cmd, rsc->fence_probe_result.exit_status, @@ -1249,16 +1254,17 @@ execute_stonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) "No connection to fencer"); return; - } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) { + } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) { rc = execd_stonith_start(stonith_api, rsc, cmd); if (rc == pcmk_ok) { do_monitor = TRUE; } - } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { + } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) { rc = execd_stonith_stop(stonith_api, rsc); - } else if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { + } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, + pcmk__str_casei)) { do_monitor = TRUE; } else { @@ -1297,7 +1303,7 @@ execute_nonstonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) #if SUPPORT_NAGIOS /* Recurring operations are cancelled anyway for a stop operation */ if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei) - && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { + && pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) { cmd->result.exit_status = PCMK_OCF_OK; cmd_finalize(cmd, rsc); @@ -1474,6 +1480,7 @@ process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id, int rc = pcmk_ok; time_t now = time(NULL); const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION); + const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE); if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) { crm_err("Cluster API version must be greater than or equal to %s, not %s", @@ -1503,6 +1510,10 @@ process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id, crm_xml_add(*reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); crm_xml_add_ll(*reply, PCMK__XA_UPTIME, now - start_time); + if (start_state) { + crm_xml_add(*reply, PCMK__XA_NODE_START_STATE, start_state); + } + return rc; } diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c index 83a8cd7..e7e30eb 100644 --- a/daemons/execd/pacemaker-execd.c +++ b/daemons/execd/pacemaker-execd.c @@ -493,26 +493,28 @@ main(int argc, char **argv, char **envp) pcmk__cli_init_logging(EXECD_NAME, args->verbosity); crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); + // ocf_log() (in resource-agents) uses the capitalized env options below option = pcmk__env_option(PCMK__ENV_LOGFACILITY); if (!pcmk__str_eq(option, PCMK__VALUE_NONE, pcmk__str_casei|pcmk__str_null_matches) && !pcmk__str_eq(option, "/dev/null", pcmk__str_none)) { - setenv("HA_LOGFACILITY", option, 1); /* Used by the ocf_log/ha_log OCF macro */ + + pcmk__set_env_option("LOGFACILITY", option, true); } option = pcmk__env_option(PCMK__ENV_LOGFILE); if (!pcmk__str_eq(option, PCMK__VALUE_NONE, pcmk__str_casei|pcmk__str_null_matches)) { - setenv("HA_LOGFILE", option, 1); /* Used by the ocf_log/ha_log OCF macro */ + pcmk__set_env_option("LOGFILE", option, true); if (pcmk__env_option_enabled(crm_system_name, PCMK__ENV_DEBUG)) { - setenv("HA_DEBUGLOG", option, 1); /* Used by the ocf_log/ha_debug OCF macro */ + pcmk__set_env_option("DEBUGLOG", option, true); } } #ifdef PCMK__COMPILE_REMOTE if (options.port != NULL) { - setenv("PCMK_remote_port", options.port, 1); + pcmk__set_env_option(PCMK__ENV_REMOTE_PORT, options.port, false); } #endif // PCMK__COMPILE_REMOTE diff --git a/daemons/execd/remoted_pidone.c b/daemons/execd/remoted_pidone.c index 4f914eb..08271bf 100644 --- a/daemons/execd/remoted_pidone.c +++ b/daemons/execd/remoted_pidone.c @@ -1,5 +1,5 @@ /* - * Copyright 2017-2020 the Pacemaker project contributors + * Copyright 2017-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -203,10 +203,14 @@ remoted_spawn_pidone(int argc, char **argv, char **envp) * from /etc/pacemaker/pcmk-init.env, which could be useful for testing or * containers with a custom PID 1 script that launches pacemaker-remoted. */ - const char *pid1 = (getpid() == 1)? "full" : getenv("PCMK_remote_pid1"); + const char *pid1 = "default"; - if (pid1 == NULL) { - return; + if (getpid() != 1) { + pid1 = pcmk__env_option(PCMK__ENV_REMOTE_PID1); + if (!pcmk__str_any_of(pid1, "full", "vars", NULL)) { + // Default, unset, or invalid + return; + } } /* When a container is launched, it may be given specific environment @@ -217,7 +221,7 @@ remoted_spawn_pidone(int argc, char **argv, char **envp) */ load_env_vars("/etc/pacemaker/pcmk-init.env"); - if (strcmp(pid1, "full")) { + if (strcmp(pid1, "vars") == 0) { return; } @@ -226,7 +230,7 @@ remoted_spawn_pidone(int argc, char **argv, char **envp) * explicitly configured in the container's environment. */ if (pcmk__env_option(PCMK__ENV_LOGFILE) == NULL) { - pcmk__set_env_option(PCMK__ENV_LOGFILE, "/var/log/pcmk-init.log"); + pcmk__set_env_option(PCMK__ENV_LOGFILE, "/var/log/pcmk-init.log", true); } sigfillset(&set); @@ -242,7 +246,7 @@ remoted_spawn_pidone(int argc, char **argv, char **envp) // Child remains as pacemaker-remoted return; case -1: - perror("fork"); + crm_err("fork failed: %s", pcmk_rc_str(errno)); } /* Parent becomes the reaper of zombie processes */ diff --git a/daemons/execd/remoted_tls.c b/daemons/execd/remoted_tls.c index c65e3f3..23a2dcf 100644 --- a/daemons/execd/remoted_tls.c +++ b/daemons/execd/remoted_tls.c @@ -273,39 +273,44 @@ bind_and_listen(struct addrinfo *addr) fd = socket(addr->ai_family, addr->ai_socktype, addr->ai_protocol); if (fd < 0) { - crm_perror(LOG_ERR, "Listener socket creation failed"); - return -1; + rc = errno; + crm_err("Listener socket creation failed: %", pcmk_rc_str(rc)); + return -rc; } /* reuse address */ optval = 1; rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)); if (rc < 0) { - crm_perror(LOG_ERR, "Local address reuse not allowed on %s", buffer); + rc = errno; + crm_err("Local address reuse not allowed on %s: %s", buffer, pcmk_rc_str(rc)); close(fd); - return -1; + return -rc; } if (addr->ai_family == AF_INET6) { optval = 0; rc = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &optval, sizeof(optval)); if (rc < 0) { - crm_perror(LOG_INFO, "Couldn't disable IPV6-only on %s", buffer); + rc = errno; + crm_err("Couldn't disable IPV6-only on %s: %s", buffer, pcmk_rc_str(rc)); close(fd); - return -1; + return -rc; } } if (bind(fd, addr->ai_addr, addr->ai_addrlen) != 0) { - crm_perror(LOG_ERR, "Cannot bind to %s", buffer); + rc = errno; + crm_err("Cannot bind to %s: %s", buffer, pcmk_rc_str(rc)); close(fd); - return -1; + return -rc; } if (listen(fd, 10) == -1) { - crm_perror(LOG_ERR, "Cannot listen on %s", buffer); + rc = errno; + crm_err("Cannot listen on %s: %s", buffer, pcmk_rc_str(rc)); close(fd); - return -1; + return -rc; } return fd; } @@ -325,12 +330,15 @@ get_address_info(const char *bind_name, int port, struct addrinfo **res) snprintf(port_str, sizeof(port_str), "%d", port); rc = getaddrinfo(bind_name, port_str, &hints, res); - if (rc) { + rc = pcmk__gaierror2rc(rc); + + if (rc != pcmk_rc_ok) { crm_err("Unable to get IP address(es) for %s: %s", - (bind_name? bind_name : "local node"), gai_strerror(rc)); - return -EADDRNOTAVAIL; + (bind_name? bind_name : "local node"), pcmk_rc_str(rc)); + return rc; } - return pcmk_ok; + + return pcmk_rc_ok; } int @@ -340,7 +348,7 @@ lrmd_init_remote_tls_server(void) int port = crm_default_remote_port(); struct addrinfo *res = NULL, *iter; gnutls_datum_t psk_key = { NULL, 0 }; - const char *bind_name = getenv("PCMK_remote_address"); + const char *bind_name = pcmk__env_option(PCMK__ENV_REMOTE_ADDRESS); static struct mainloop_fd_callbacks remote_listen_fd_callbacks = { .dispatch = lrmd_remote_listen, @@ -371,7 +379,7 @@ lrmd_init_remote_tls_server(void) } gnutls_free(psk_key.data); - if (get_address_info(bind_name, port, &res) != pcmk_ok) { + if (get_address_info(bind_name, port, &res) != pcmk_rc_ok) { return -1; } @@ -391,7 +399,7 @@ lrmd_init_remote_tls_server(void) if (iter->ai_family == filter) { ssock = bind_and_listen(iter); } - if (ssock != -1) { + if (ssock >= 0) { break; } diff --git a/daemons/fenced/Makefile.am b/daemons/fenced/Makefile.am index 2ca0088..62aa864 100644 --- a/daemons/fenced/Makefile.am +++ b/daemons/fenced/Makefile.am @@ -14,7 +14,8 @@ include $(top_srcdir)/mk/man.mk halibdir = $(CRM_DAEMON_DIR) -halib_PROGRAMS = pacemaker-fenced cts-fence-helper +halib_PROGRAMS = pacemaker-fenced \ + cts-fence-helper noinst_HEADERS = pacemaker-fenced.h @@ -23,30 +24,36 @@ man7_MANS = pacemaker-fenced.7 endif cts_fence_helper_SOURCES = cts-fence-helper.c -cts_fence_helper_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/fencing/libstonithd.la +cts_fence_helper_LDADD = $(top_builddir)/lib/fencing/libstonithd.la +cts_fence_helper_LDADD += $(top_builddir)/lib/common/libcrmcommon.la pacemaker_fenced_YFLAGS = -d pacemaker_fenced_CFLAGS = $(CFLAGS_HARDENED_EXE) pacemaker_fenced_LDFLAGS = $(LDFLAGS_HARDENED_EXE) -pacemaker_fenced_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/cib/libcib.la \ - $(top_builddir)/lib/cluster/libcrmcluster.la \ - $(top_builddir)/lib/fencing/libstonithd.la \ - $(top_builddir)/lib/pengine/libpe_status.la \ - $(top_builddir)/lib/pacemaker/libpacemaker.la \ - $(CLUSTERLIBS) -pacemaker_fenced_SOURCES = pacemaker-fenced.c \ - fenced_commands.c \ - fenced_remote.c \ + +pacemaker_fenced_LDADD = $(top_builddir)/lib/pacemaker/libpacemaker.la +pacemaker_fenced_LDADD += $(top_builddir)/lib/pengine/libpe_status.la +pacemaker_fenced_LDADD += $(top_builddir)/lib/cib/libcib.la +pacemaker_fenced_LDADD += $(top_builddir)/lib/cluster/libcrmcluster.la +pacemaker_fenced_LDADD += $(top_builddir)/lib/fencing/libstonithd.la +pacemaker_fenced_LDADD += $(top_builddir)/lib/common/libcrmcommon.la +pacemaker_fenced_LDADD += $(CLUSTERLIBS) + +pacemaker_fenced_SOURCES = pacemaker-fenced.c \ + fenced_cib.c \ + fenced_commands.c \ + fenced_remote.c \ + fenced_scheduler.c \ fenced_history.c CLEANFILES = $(man7_MANS) $(man8_MANS) if BUILD_LEGACY_LINKS +.PHONY: install-exec-hook install-exec-hook: cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f stonithd && $(LN_S) pacemaker-fenced stonithd +.PHONY: uninstall-hook uninstall-hook: cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f stonithd endif diff --git a/daemons/fenced/cts-fence-helper.c b/daemons/fenced/cts-fence-helper.c index e18a1f4..07bd500 100644 --- a/daemons/fenced/cts-fence-helper.c +++ b/daemons/fenced/cts-fence-helper.c @@ -212,10 +212,12 @@ run_fence_failure_test(void) cmds->register_device(st, st_opts, "test-id1", "stonith-ng", "fence_dummy", params), "Register device1 for failure test", 1, 0); - single_test(st->cmds->fence(st, st_opts, "false_1_node2", "off", 3, 0), + single_test(st->cmds->fence(st, st_opts, "false_1_node2", PCMK_ACTION_OFF, + 3, 0), "Fence failure results off", 1, -ENODATA); - single_test(st->cmds->fence(st, st_opts, "false_1_node2", "reboot", 3, 0), + single_test(st->cmds->fence(st, st_opts, "false_1_node2", + PCMK_ACTION_REBOOT, 3, 0), "Fence failure results reboot", 1, -ENODATA); single_test(st->cmds->remove_device(st, st_opts, "test-id1"), @@ -246,11 +248,13 @@ run_fence_failure_rollover_test(void) cmds->register_device(st, st_opts, "test-id2", "stonith-ng", "fence_dummy", params), "Register device2 for rollover test", 1, 0); - single_test(st->cmds->fence(st, st_opts, "false_1_node2", "off", 3, 0), + single_test(st->cmds->fence(st, st_opts, "false_1_node2", PCMK_ACTION_OFF, + 3, 0), "Fence rollover results off", 1, 0); /* Expect -ENODEV because fence_dummy requires 'on' to be executed on target */ - single_test(st->cmds->fence(st, st_opts, "false_1_node2", "on", 3, 0), + single_test(st->cmds->fence(st, st_opts, "false_1_node2", PCMK_ACTION_ON, 3, + 0), "Fence rollover results on", 1, -ENODEV); single_test(st->cmds->remove_device(st, st_opts, "test-id1"), @@ -278,7 +282,8 @@ run_standard_test(void) stonith_key_value_freeall(params, 1, 1); params = NULL; - single_test(st->cmds->list(st, st_opts, "test-id", NULL, 1), "list", 1, 0); + single_test(st->cmds->list(st, st_opts, "test-id", NULL, 1), + PCMK_ACTION_LIST, 1, 0); single_test(st->cmds->monitor(st, st_opts, "test-id", 1), "Monitor", 1, 0); @@ -288,14 +293,17 @@ run_standard_test(void) single_test(st->cmds->status(st, st_opts, "test-id", "false_1_node1", 1), "Status false_1_node1", 1, 0); - single_test(st->cmds->fence(st, st_opts, "unknown-host", "off", 1, 0), + single_test(st->cmds->fence(st, st_opts, "unknown-host", PCMK_ACTION_OFF, + 1, 0), "Fence unknown-host (expected failure)", 0, -ENODEV); - single_test(st->cmds->fence(st, st_opts, "false_1_node1", "off", 1, 0), + single_test(st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_OFF, + 1, 0), "Fence false_1_node1", 1, 0); /* Expect -ENODEV because fence_dummy requires 'on' to be executed on target */ - single_test(st->cmds->fence(st, st_opts, "false_1_node1", "on", 1, 0), + single_test(st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_ON, 1, + 0), "Unfence false_1_node1", 1, -ENODEV); /* Confirm that an invalid level index is rejected */ @@ -362,31 +370,31 @@ standard_dev_test(void) rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); crm_debug("Status false_1_node1: %d", rc); - rc = st->cmds->fence(st, st_opts, "unknown-host", "off", 60, 0); + rc = st->cmds->fence(st, st_opts, "unknown-host", PCMK_ACTION_OFF, 60, 0); crm_debug("Fence unknown-host: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); crm_debug("Status false_1_node1: %d", rc); - rc = st->cmds->fence(st, st_opts, "false_1_node1", "off", 60, 0); + rc = st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_OFF, 60, 0); crm_debug("Fence false_1_node1: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); crm_debug("Status false_1_node1: %d", rc); - rc = st->cmds->fence(st, st_opts, "false_1_node1", "on", 10, 0); + rc = st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_ON, 10, 0); crm_debug("Unfence false_1_node1: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); crm_debug("Status false_1_node1: %d", rc); - rc = st->cmds->fence(st, st_opts, "some-host", "off", 10, 0); + rc = st->cmds->fence(st, st_opts, "some-host", PCMK_ACTION_OFF, 10, 0); crm_debug("Fence alias: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "some-host", 10); crm_debug("Status alias: %d", rc); - rc = st->cmds->fence(st, st_opts, "false_1_node1", "on", 10, 0); + rc = st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_ON, 10, 0); crm_debug("Unfence false_1_node1: %d", rc); rc = st->cmds->remove_device(st, st_opts, "test-id"); @@ -426,7 +434,8 @@ test_async_fence_pass(int check_event) return; } - rc = st->cmds->fence(st, 0, "true_1_node1", "off", MAINLOOP_DEFAULT_TIMEOUT, 0); + rc = st->cmds->fence(st, 0, "true_1_node1", PCMK_ACTION_OFF, + MAINLOOP_DEFAULT_TIMEOUT, 0); if (rc < 0) { crm_err("fence failed with rc %d", rc); mainloop_test_done(__func__, false); @@ -459,7 +468,8 @@ test_async_fence_custom_timeout(int check_event) } begin = time(NULL); - rc = st->cmds->fence(st, 0, "custom_timeout_node1", "off", MAINLOOP_DEFAULT_TIMEOUT, 0); + rc = st->cmds->fence(st, 0, "custom_timeout_node1", PCMK_ACTION_OFF, + MAINLOOP_DEFAULT_TIMEOUT, 0); if (rc < 0) { crm_err("fence failed with rc %d", rc); mainloop_test_done(__func__, false); @@ -479,7 +489,8 @@ test_async_fence_timeout(int check_event) return; } - rc = st->cmds->fence(st, 0, "false_1_node2", "off", MAINLOOP_DEFAULT_TIMEOUT, 0); + rc = st->cmds->fence(st, 0, "false_1_node2", PCMK_ACTION_OFF, + MAINLOOP_DEFAULT_TIMEOUT, 0); if (rc < 0) { crm_err("fence failed with rc %d", rc); mainloop_test_done(__func__, false); diff --git a/daemons/fenced/fenced_cib.c b/daemons/fenced/fenced_cib.c new file mode 100644 index 0000000..e11bf68 --- /dev/null +++ b/daemons/fenced/fenced_cib.c @@ -0,0 +1,734 @@ +/* + * Copyright 2009-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. +*/ + +#include <crm_internal.h> + +#include <stdbool.h> +#include <stdio.h> +#include <libxml/tree.h> +#include <libxml/xpath.h> + +#include <crm/crm.h> +#include <crm/msg_xml.h> +#include <crm/common/xml.h> + +#include <crm/cluster/internal.h> + +#include <crm/cib.h> +#include <crm/cib/internal.h> + +#include <pacemaker-fenced.h> + +static xmlNode *local_cib = NULL; +static cib_t *cib_api = NULL; +static bool have_cib_devices = FALSE; + +/*! + * \internal + * \brief Check whether a node has a specific attribute name/value + * + * \param[in] node Name of node to check + * \param[in] name Name of an attribute to look for + * \param[in] value The value the named attribute needs to be set to in order to be considered a match + * + * \return TRUE if the locally cached CIB has the specified node attribute + */ +gboolean +node_has_attr(const char *node, const char *name, const char *value) +{ + GString *xpath = NULL; + xmlNode *match; + + CRM_CHECK((local_cib != NULL) && (node != NULL) && (name != NULL) + && (value != NULL), return FALSE); + + /* Search for the node's attributes in the CIB. While the schema allows + * multiple sets of instance attributes, and allows instance attributes to + * use id-ref to reference values elsewhere, that is intended for resources, + * so we ignore that here. + */ + xpath = g_string_sized_new(256); + pcmk__g_strcat(xpath, + "//" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE + "[@" XML_ATTR_UNAME "='", node, "']/" XML_TAG_ATTR_SETS + "/" XML_CIB_TAG_NVPAIR + "[@" XML_NVPAIR_ATTR_NAME "='", name, "' " + "and @" XML_NVPAIR_ATTR_VALUE "='", value, "']", NULL); + + match = get_xpath_object((const char *) xpath->str, local_cib, LOG_NEVER); + + g_string_free(xpath, TRUE); + return (match != NULL); +} + +static void +add_topology_level(xmlNode *match) +{ + char *desc = NULL; + pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; + + CRM_CHECK(match != NULL, return); + + fenced_register_level(match, &desc, &result); + fenced_send_level_notification(STONITH_OP_LEVEL_ADD, &result, desc); + pcmk__reset_result(&result); + free(desc); +} + +static void +topology_remove_helper(const char *node, int level) +{ + char *desc = NULL; + pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; + xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); + + crm_xml_add(data, F_STONITH_ORIGIN, __func__); + crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); + crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); + + fenced_unregister_level(data, &desc, &result); + fenced_send_level_notification(STONITH_OP_LEVEL_DEL, &result, desc); + pcmk__reset_result(&result); + free_xml(data); + free(desc); +} + +static void +remove_topology_level(xmlNode *match) +{ + int index = 0; + char *key = NULL; + + CRM_CHECK(match != NULL, return); + + key = stonith_level_key(match, fenced_target_by_unknown); + crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); + topology_remove_helper(key, index); + free(key); +} + +static void +register_fencing_topology(xmlXPathObjectPtr xpathObj) +{ + int max = numXpathResults(xpathObj), lpc = 0; + + for (lpc = 0; lpc < max; lpc++) { + xmlNode *match = getXpathResult(xpathObj, lpc); + + remove_topology_level(match); + add_topology_level(match); + } +} + +/* Fencing +<diff crm_feature_set="3.0.6"> + <diff-removed> + <fencing-topology> + <fencing-level id="f-p1.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="removed:top"/> + <fencing-level id="f-p1.2" target="pcmk-1" index="2" devices="power" __crm_diff_marker__="removed:top"/> + <fencing-level devices="disk,network" id="f-p2.1"/> + </fencing-topology> + </diff-removed> + <diff-added> + <fencing-topology> + <fencing-level id="f-p.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="added:top"/> + <fencing-level id="f-p2.1" target="pcmk-2" index="1" devices="disk,something"/> + <fencing-level id="f-p3.1" target="pcmk-2" index="2" devices="power" __crm_diff_marker__="added:top"/> + </fencing-topology> + </diff-added> +</diff> +*/ + +void +fencing_topology_init(void) +{ + xmlXPathObjectPtr xpathObj = NULL; + const char *xpath = "//" XML_TAG_FENCING_LEVEL; + + crm_trace("Full topology refresh"); + free_topology_list(); + init_topology_list(); + + /* Grab everything */ + xpathObj = xpath_search(local_cib, xpath); + register_fencing_topology(xpathObj); + + freeXpathObject(xpathObj); +} + +static void +remove_cib_device(xmlXPathObjectPtr xpathObj) +{ + int max = numXpathResults(xpathObj), lpc = 0; + + for (lpc = 0; lpc < max; lpc++) { + const char *rsc_id = NULL; + const char *standard = NULL; + xmlNode *match = getXpathResult(xpathObj, lpc); + + CRM_LOG_ASSERT(match != NULL); + if(match != NULL) { + standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); + } + + if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { + continue; + } + + rsc_id = crm_element_value(match, XML_ATTR_ID); + + stonith_device_remove(rsc_id, true); + } +} + +static void +update_stonith_watchdog_timeout_ms(xmlNode *cib) +{ + long timeout_ms = 0; + xmlNode *stonith_watchdog_xml = NULL; + const char *value = NULL; + + stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", + cib, LOG_NEVER); + if (stonith_watchdog_xml) { + value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); + } + if (value) { + timeout_ms = crm_get_msec(value); + } + + if (timeout_ms < 0) { + timeout_ms = pcmk__auto_watchdog_timeout(); + } + + stonith_watchdog_timeout_ms = timeout_ms; +} + +/*! + * \internal + * \brief Update all STONITH device definitions based on current CIB + */ +static void +cib_devices_update(void) +{ + GHashTableIter iter; + stonith_device_t *device = NULL; + + crm_info("Updating devices to version %s.%s.%s", + crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN), + crm_element_value(local_cib, XML_ATTR_GENERATION), + crm_element_value(local_cib, XML_ATTR_NUMUPDATES)); + + g_hash_table_iter_init(&iter, device_list); + while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { + if (device->cib_registered) { + device->dirty = TRUE; + } + } + + /* have list repopulated if cib has a watchdog-fencing-resource + TODO: keep a cached list for queries happening while we are refreshing + */ + g_list_free_full(stonith_watchdog_targets, free); + stonith_watchdog_targets = NULL; + + fenced_scheduler_run(local_cib); + + g_hash_table_iter_init(&iter, device_list); + while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { + if (device->dirty) { + g_hash_table_iter_remove(&iter); + } + } +} + +static void +update_cib_stonith_devices_v1(const char *event, xmlNode * msg) +{ + const char *reason = "none"; + gboolean needs_update = FALSE; + xmlXPathObjectPtr xpath_obj = NULL; + + /* process new constraints */ + xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION); + if (numXpathResults(xpath_obj) > 0) { + int max = numXpathResults(xpath_obj), lpc = 0; + + /* Safest and simplest to always recompute */ + needs_update = TRUE; + reason = "new location constraint"; + + for (lpc = 0; lpc < max; lpc++) { + xmlNode *match = getXpathResult(xpath_obj, lpc); + + crm_log_xml_trace(match, "new constraint"); + } + } + freeXpathObject(xpath_obj); + + /* process deletions */ + xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE); + if (numXpathResults(xpath_obj) > 0) { + remove_cib_device(xpath_obj); + } + freeXpathObject(xpath_obj); + + /* process additions */ + xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE); + if (numXpathResults(xpath_obj) > 0) { + int max = numXpathResults(xpath_obj), lpc = 0; + + for (lpc = 0; lpc < max; lpc++) { + const char *rsc_id = NULL; + const char *standard = NULL; + xmlNode *match = getXpathResult(xpath_obj, lpc); + + rsc_id = crm_element_value(match, XML_ATTR_ID); + standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); + + if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { + continue; + } + + crm_trace("Fencing resource %s was added or modified", rsc_id); + reason = "new resource"; + needs_update = TRUE; + } + } + freeXpathObject(xpath_obj); + + if(needs_update) { + crm_info("Updating device list from CIB: %s", reason); + cib_devices_update(); + } +} + +static void +update_cib_stonith_devices_v2(const char *event, xmlNode * msg) +{ + xmlNode *change = NULL; + char *reason = NULL; + bool needs_update = FALSE; + xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); + + for (change = pcmk__xml_first_child(patchset); change != NULL; + change = pcmk__xml_next(change)) { + const char *op = crm_element_value(change, XML_DIFF_OP); + const char *xpath = crm_element_value(change, XML_DIFF_PATH); + const char *shortpath = NULL; + + if ((op == NULL) || + (strcmp(op, "move") == 0) || + strstr(xpath, "/"XML_CIB_TAG_STATUS)) { + continue; + } else if (pcmk__str_eq(op, "delete", pcmk__str_casei) && strstr(xpath, "/"XML_CIB_TAG_RESOURCE)) { + const char *rsc_id = NULL; + char *search = NULL; + char *mutable = NULL; + + if (strstr(xpath, XML_TAG_ATTR_SETS) || + strstr(xpath, XML_TAG_META_SETS)) { + needs_update = TRUE; + pcmk__str_update(&reason, + "(meta) attribute deleted from resource"); + break; + } + pcmk__str_update(&mutable, xpath); + rsc_id = strstr(mutable, "primitive[@" XML_ATTR_ID "=\'"); + if (rsc_id != NULL) { + rsc_id += strlen("primitive[@" XML_ATTR_ID "=\'"); + search = strchr(rsc_id, '\''); + } + if (search != NULL) { + *search = 0; + stonith_device_remove(rsc_id, true); + /* watchdog_device_update called afterwards + to fall back to implicit definition if needed */ + } else { + crm_warn("Ignoring malformed CIB update (resource deletion)"); + } + free(mutable); + + } else if (strstr(xpath, "/"XML_CIB_TAG_RESOURCES) || + strstr(xpath, "/"XML_CIB_TAG_CONSTRAINTS) || + strstr(xpath, "/"XML_CIB_TAG_RSCCONFIG)) { + shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); + reason = crm_strdup_printf("%s %s", op, shortpath+1); + needs_update = TRUE; + break; + } + } + + if(needs_update) { + crm_info("Updating device list from CIB: %s", reason); + cib_devices_update(); + } else { + crm_trace("No updates for device list found in CIB"); + } + free(reason); +} + +static void +update_cib_stonith_devices(const char *event, xmlNode * msg) +{ + int format = 1; + xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); + + CRM_ASSERT(patchset); + crm_element_value_int(patchset, PCMK_XA_FORMAT, &format); + switch(format) { + case 1: + update_cib_stonith_devices_v1(event, msg); + break; + case 2: + update_cib_stonith_devices_v2(event, msg); + break; + default: + crm_warn("Unknown patch format: %d", format); + } +} + +static void +watchdog_device_update(void) +{ + if (stonith_watchdog_timeout_ms > 0) { + if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) && + !stonith_watchdog_targets) { + /* getting here watchdog-fencing enabled, no device there yet + and reason isn't stonith_watchdog_targets preventing that + */ + int rc; + xmlNode *xml; + + xml = create_device_registration_xml( + STONITH_WATCHDOG_ID, + st_namespace_internal, + STONITH_WATCHDOG_AGENT, + NULL, /* stonith_device_register will add our + own name as PCMK_STONITH_HOST_LIST param + so we can skip that here + */ + NULL); + rc = stonith_device_register(xml, TRUE); + free_xml(xml); + if (rc != pcmk_ok) { + rc = pcmk_legacy2rc(rc); + exit_code = CRM_EX_FATAL; + crm_crit("Cannot register watchdog pseudo fence agent: %s", + pcmk_rc_str(rc)); + stonith_shutdown(0); + } + } + + } else if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) != NULL) { + /* be silent if no device - todo parameter to stonith_device_remove */ + stonith_device_remove(STONITH_WATCHDOG_ID, true); + } +} + +/*! + * \internal + * \brief Query the full CIB + * + * \return Standard Pacemaker return code + */ +static int +fenced_query_cib(void) +{ + int rc = pcmk_ok; + + crm_trace("Re-requesting full CIB"); + rc = cib_api->cmds->query(cib_api, NULL, &local_cib, + cib_scope_local|cib_sync_call); + rc = pcmk_legacy2rc(rc); + if (rc == pcmk_rc_ok) { + CRM_ASSERT(local_cib != NULL); + } else { + crm_err("Couldn't retrieve the CIB: %s " CRM_XS " rc=%d", + pcmk_rc_str(rc), rc); + } + return rc; +} + +static void +remove_fencing_topology(xmlXPathObjectPtr xpathObj) +{ + int max = numXpathResults(xpathObj), lpc = 0; + + for (lpc = 0; lpc < max; lpc++) { + xmlNode *match = getXpathResult(xpathObj, lpc); + + CRM_LOG_ASSERT(match != NULL); + if (match && crm_element_value(match, XML_DIFF_MARKER)) { + /* Deletion */ + int index = 0; + char *target = stonith_level_key(match, fenced_target_by_unknown); + + crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); + if (target == NULL) { + crm_err("Invalid fencing target in element %s", ID(match)); + + } else if (index <= 0) { + crm_err("Invalid level for %s in element %s", target, ID(match)); + + } else { + topology_remove_helper(target, index); + } + /* } else { Deal with modifications during the 'addition' stage */ + } + } +} + +static void +update_fencing_topology(const char *event, xmlNode * msg) +{ + int format = 1; + const char *xpath; + xmlXPathObjectPtr xpathObj = NULL; + xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); + + CRM_ASSERT(patchset); + crm_element_value_int(patchset, PCMK_XA_FORMAT, &format); + + if(format == 1) { + /* Process deletions (only) */ + xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL; + xpathObj = xpath_search(msg, xpath); + + remove_fencing_topology(xpathObj); + freeXpathObject(xpathObj); + + /* Process additions and changes */ + xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; + xpathObj = xpath_search(msg, xpath); + + register_fencing_topology(xpathObj); + freeXpathObject(xpathObj); + + } else if(format == 2) { + xmlNode *change = NULL; + int add[] = { 0, 0, 0 }; + int del[] = { 0, 0, 0 }; + + xml_patch_versions(patchset, add, del); + + for (change = pcmk__xml_first_child(patchset); change != NULL; + change = pcmk__xml_next(change)) { + const char *op = crm_element_value(change, XML_DIFF_OP); + const char *xpath = crm_element_value(change, XML_DIFF_PATH); + + if(op == NULL) { + continue; + + } else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) { + /* Change to a specific entry */ + + crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); + if(strcmp(op, "move") == 0) { + continue; + + } else if(strcmp(op, "create") == 0) { + add_topology_level(change->children); + + } else if(strcmp(op, "modify") == 0) { + xmlNode *match = first_named_child(change, XML_DIFF_RESULT); + + if(match) { + remove_topology_level(match->children); + add_topology_level(match->children); + } + + } else if(strcmp(op, "delete") == 0) { + /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */ + crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s", + op, add[0], add[1], add[2], xpath); + fencing_topology_init(); + return; + } + + } else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) { + /* Change to the topology in general */ + crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s", + op, add[0], add[1], add[2], xpath); + fencing_topology_init(); + return; + + } else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) { + /* Changes to the whole config section, possibly including the topology as a whild */ + if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) { + crm_trace("Nothing for us in %s operation %d.%d.%d for %s.", + op, add[0], add[1], add[2], xpath); + + } else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) { + crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.", + op, add[0], add[1], add[2], xpath); + fencing_topology_init(); + return; + } + + } else { + crm_trace("Nothing for us in %s operation %d.%d.%d for %s", + op, add[0], add[1], add[2], xpath); + } + } + + } else { + crm_warn("Unknown patch format: %d", format); + } +} + +static void +update_cib_cache_cb(const char *event, xmlNode * msg) +{ + long timeout_ms_saved = stonith_watchdog_timeout_ms; + bool need_full_refresh = false; + + if(!have_cib_devices) { + crm_trace("Skipping updates until we get a full dump"); + return; + + } else if(msg == NULL) { + crm_trace("Missing %s update", event); + return; + } + + /* Maintain a local copy of the CIB so that we have full access + * to device definitions, location constraints, and node attributes + */ + if (local_cib != NULL) { + int rc = pcmk_ok; + xmlNode *patchset = NULL; + + crm_element_value_int(msg, F_CIB_RC, &rc); + if (rc != pcmk_ok) { + return; + } + + patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); + rc = xml_apply_patchset(local_cib, patchset, TRUE); + switch (rc) { + case pcmk_ok: + case -pcmk_err_old_data: + break; + case -pcmk_err_diff_resync: + case -pcmk_err_diff_failed: + crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); + free_xml(local_cib); + local_cib = NULL; + break; + default: + crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); + free_xml(local_cib); + local_cib = NULL; + } + } + + if (local_cib == NULL) { + if (fenced_query_cib() != pcmk_rc_ok) { + return; + } + need_full_refresh = true; + } + + pcmk__refresh_node_caches_from_cib(local_cib); + update_stonith_watchdog_timeout_ms(local_cib); + + if (timeout_ms_saved != stonith_watchdog_timeout_ms) { + need_full_refresh = true; + } + + if (need_full_refresh) { + fencing_topology_init(); + cib_devices_update(); + } else { + // Partial refresh + update_fencing_topology(event, msg); + update_cib_stonith_devices(event, msg); + } + + watchdog_device_update(); +} + +static void +init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + crm_info("Updating device list from CIB"); + have_cib_devices = TRUE; + local_cib = copy_xml(output); + + pcmk__refresh_node_caches_from_cib(local_cib); + update_stonith_watchdog_timeout_ms(local_cib); + + fencing_topology_init(); + cib_devices_update(); + watchdog_device_update(); +} + +static void +cib_connection_destroy(gpointer user_data) +{ + if (stonith_shutdown_flag) { + crm_info("Connection to the CIB manager closed"); + return; + } else { + crm_crit("Lost connection to the CIB manager, shutting down"); + } + if (cib_api) { + cib_api->cmds->signoff(cib_api); + } + stonith_shutdown(0); +} + +/*! + * \internal + * \brief Disconnect from CIB manager + */ +void +fenced_cib_cleanup(void) +{ + if (cib_api != NULL) { + cib_api->cmds->del_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, + update_cib_cache_cb); + cib__clean_up_connection(&cib_api); + } + free_xml(local_cib); + local_cib = NULL; +} + +void +setup_cib(void) +{ + int rc, retries = 0; + + cib_api = cib_new(); + if (cib_api == NULL) { + crm_err("No connection to the CIB manager"); + return; + } + + do { + sleep(retries); + rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command); + } while (rc == -ENOTCONN && ++retries < 5); + + if (rc != pcmk_ok) { + crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc); + + } else if (pcmk_ok != + cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { + crm_err("Could not set CIB notification callback"); + + } else { + rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local); + cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", + init_cib_cache_cb); + cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); + crm_info("Watching for fencing topology changes"); + } +} diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index ba63cf8..7a62ed6 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -68,8 +68,6 @@ struct device_search_s { static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data); -static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, - pcmk__client_t *client); static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence); @@ -124,7 +122,7 @@ static gboolean is_action_required(const char *action, const stonith_device_t *device) { return (device != NULL) && device->automatic_unfencing - && pcmk__str_eq(action, "on", pcmk__str_none); + && pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none); } static int @@ -223,11 +221,11 @@ get_action_timeout(const stonith_device_t *device, const char *action, /* If "reboot" was requested but the device does not support it, * we will remap to "off", so check timeout for "off" instead */ - if (pcmk__str_eq(action, "reboot", pcmk__str_none) + if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_trace("%s doesn't support reboot, using timeout for off instead", device->id); - action = "off"; + action = PCMK_ACTION_OFF; } /* If the device config specified an action-specific timeout, use it */ @@ -277,7 +275,7 @@ fenced_device_reboot_action(const char *device_id) action = g_hash_table_lookup(device->params, "pcmk_reboot_action"); } } - return pcmk__s(action, "reboot"); + return pcmk__s(action, PCMK_ACTION_REBOOT); } /*! @@ -554,7 +552,7 @@ stonith_device_execute(stonith_device_t * device) #if SUPPORT_CIBSECRETS exec_rc = pcmk__substitute_secrets(device->id, device->params); if (exec_rc != pcmk_rc_ok) { - if (pcmk__str_eq(cmd->action, "stop", pcmk__str_none)) { + if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) { crm_info("Proceeding with stop operation for %s " "despite being unable to load CIB secrets (%s)", device->id, pcmk_rc_str(exec_rc)); @@ -570,14 +568,14 @@ stonith_device_execute(stonith_device_t * device) #endif action_str = cmd->action; - if (pcmk__str_eq(cmd->action, "reboot", pcmk__str_none) + if (pcmk__str_eq(cmd->action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_notice("Remapping 'reboot' action%s%s using %s to 'off' " "because agent '%s' does not support reboot", ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, device->agent); - action_str = "off"; + action_str = PCMK_ACTION_OFF; } if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) { @@ -691,7 +689,7 @@ schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) delay_base = delay_max; } if (delay_max > 0) { - // coverity[dont_call] We're not using rand() for security + // coverity[dontcall] It doesn't matter here if rand() is predictable cmd->start_delay += ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) + delay_base; @@ -948,16 +946,16 @@ read_action_metadata(stonith_device_t *device) action = crm_element_value(match, "name"); - if (pcmk__str_eq(action, "list", pcmk__str_none)) { + if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_list); - } else if (pcmk__str_eq(action, "status", pcmk__str_none)) { + } else if (pcmk__str_eq(action, PCMK_ACTION_STATUS, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_status); - } else if (pcmk__str_eq(action, "reboot", pcmk__str_none)) { + } else if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_reboot); - } else if (pcmk__str_eq(action, "on", pcmk__str_none)) { + } else if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { /* "automatic" means the cluster will unfence node when it joins */ /* "required" is a deprecated synonym for "automatic" */ if (pcmk__xe_attr_is_true(match, "automatic") || pcmk__xe_attr_is_true(match, "required")) { @@ -1024,16 +1022,16 @@ xml2device_params(const char *name, const xmlNode *dev) if (*value == '\0') { crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP); - } else if (strcmp(value, "reboot") == 0) { + } else if (strcmp(value, PCMK_ACTION_REBOOT) == 0) { crm_warn("Ignoring %s='reboot' (see stonith-action cluster property instead)", STONITH_ATTR_ACTION_OP); - } else if (strcmp(value, "off") == 0) { - map_action(params, "reboot", value); + } else if (strcmp(value, PCMK_ACTION_OFF) == 0) { + map_action(params, PCMK_ACTION_REBOOT, value); } else { - map_action(params, "off", value); - map_action(params, "reboot", value); + map_action(params, PCMK_ACTION_OFF, value); + map_action(params, PCMK_ACTION_REBOOT, value); } g_hash_table_remove(params, STONITH_ATTR_ACTION_OP); @@ -1132,7 +1130,7 @@ build_device_from_xml(xmlNode *dev) device->automatic_unfencing = TRUE; } - if (is_action_required("on", device)) { + if (is_action_required(PCMK_ACTION_ON, device)) { crm_info("Fencing device '%s' requires unfencing", device->id); } @@ -1672,8 +1670,7 @@ unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target, * search by xpath, because it might give multiple hits if the XML is the * CIB. */ - if ((xml != NULL) - && !pcmk__str_eq(TYPE(xml), XML_TAG_FENCING_LEVEL, pcmk__str_none)) { + if ((xml != NULL) && !pcmk__xe_is(xml, XML_TAG_FENCING_LEVEL)) { xml = get_xpath_object("//" XML_TAG_FENCING_LEVEL, xml, LOG_WARNING); } @@ -1972,7 +1969,7 @@ execute_agent_action(xmlNode *msg, pcmk__action_result_t *result) "Watchdog fence device not configured"); return; - } else if (pcmk__str_eq(action, "list", pcmk__str_none)) { + } else if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_result_output(result, list_to_string(stonith_watchdog_targets, @@ -1980,7 +1977,7 @@ execute_agent_action(xmlNode *msg, pcmk__action_result_t *result) NULL); return; - } else if (pcmk__str_eq(action, "monitor", pcmk__str_none)) { + } else if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return; } @@ -1994,7 +1991,8 @@ execute_agent_action(xmlNode *msg, pcmk__action_result_t *result) "'%s' not found", id); return; - } else if (!device->api_registered && !strcmp(action, "monitor")) { + } else if (!device->api_registered + && (strcmp(action, PCMK_ACTION_MONITOR) == 0)) { // Monitors may run only on "started" (API-registered) devices crm_info("Ignoring API '%s' action request because device %s not active", action, id); @@ -2104,14 +2102,14 @@ localhost_is_eligible_with_remap(const stonith_device_t *device, // Check potential remaps - if (pcmk__str_eq(action, "reboot", pcmk__str_none)) { + if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* "reboot" might get remapped to "off" then "on", so even if reboot is * disallowed, return true if either of those is allowed. We'll report * the disallowed actions with the results. We never allow self-fencing * for remapped "on" actions because the target is off at that point. */ - if (localhost_is_eligible(device, "off", target, allow_self) - || localhost_is_eligible(device, "on", target, FALSE)) { + if (localhost_is_eligible(device, PCMK_ACTION_OFF, target, allow_self) + || localhost_is_eligible(device, PCMK_ACTION_ON, target, FALSE)) { return true; } } @@ -2146,7 +2144,7 @@ can_fence_host_with_device(stonith_device_t *dev, /* Answer immediately if the device does not support the action * or the local node is not allowed to perform it */ - if (pcmk__str_eq(action, "on", pcmk__str_none) + if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none) && !pcmk_is_set(dev->flags, st_device_supports_on)) { check_type = "Agent does not support 'on'"; goto search_report_results; @@ -2175,7 +2173,8 @@ can_fence_host_with_device(stonith_device_t *dev, time_t now = time(NULL); if (dev->targets == NULL || dev->targets_age + 60 < now) { - int device_timeout = get_action_timeout(dev, "list", search->per_device_timeout); + int device_timeout = get_action_timeout(dev, PCMK_ACTION_LIST, + search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_list_timeout(%ds) parameter of %s is larger than stonith-timeout(%ds), timeout may occur", @@ -2185,7 +2184,7 @@ can_fence_host_with_device(stonith_device_t *dev, crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); - schedule_internal_command(__func__, dev, "list", NULL, + schedule_internal_command(__func__, dev, PCMK_ACTION_LIST, NULL, search->per_device_timeout, search, dynamic_list_search_cb); /* we'll respond to this search request async in the cb */ @@ -2207,7 +2206,7 @@ can_fence_host_with_device(stonith_device_t *dev, crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); - schedule_internal_command(__func__, dev, "status", target, + schedule_internal_command(__func__, dev, PCMK_ACTION_STATUS, target, search->per_device_timeout, search, status_search_cb); /* we'll respond to this search request async in the cb */ return; @@ -2384,6 +2383,30 @@ add_action_reply(xmlNode *xml, const char *action, add_disallowed(child, action, device, target, allow_suicide); } +/*! + * \internal + * \brief Send a reply to a CPG peer or IPC client + * + * \param[in] reply XML reply to send + * \param[in] call_options Send synchronously if st_opt_sync_call is set + * \param[in] remote_peer If not NULL, name of peer node to send CPG reply + * \param[in,out] client If not NULL, client to send IPC reply + */ +static void +stonith_send_reply(const xmlNode *reply, int call_options, + const char *remote_peer, pcmk__client_t *client) +{ + CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)), + return); + + if (remote_peer == NULL) { + do_local_reply(reply, client, call_options); + } else { + send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, + reply, FALSE); + } +} + static void stonith_query_capable_device_cb(GList * devices, void *user_data) { @@ -2429,15 +2452,16 @@ stonith_query_capable_device_cb(GList * devices, void *user_data) * capable device that doesn't support "reboot", remap to "off" instead. */ if (!pcmk_is_set(device->flags, st_device_supports_reboot) - && pcmk__str_eq(query->action, "reboot", pcmk__str_none)) { + && pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, + pcmk__str_none)) { crm_trace("%s doesn't support reboot, using values for off instead", device->id); - action = "off"; + action = PCMK_ACTION_OFF; } /* Add action-specific values if available */ add_action_specific_attributes(dev, action, device, query->target); - if (pcmk__str_eq(query->action, "reboot", pcmk__str_none)) { + if (pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* A "reboot" *might* get remapped to "off" then "on", so after * sending the "reboot"-specific values in the main element, we add * sub-elements for "off" and "on" values. @@ -2451,9 +2475,9 @@ stonith_query_capable_device_cb(GList * devices, void *user_data) */ add_disallowed(dev, action, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); - add_action_reply(dev, "off", device, query->target, + add_action_reply(dev, PCMK_ACTION_OFF, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); - add_action_reply(dev, "on", device, query->target, FALSE); + add_action_reply(dev, PCMK_ACTION_ON, device, query->target, FALSE); } /* A query without a target wants device parameters */ @@ -2765,8 +2789,10 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) /* The device is ready to do something else now */ if (device) { - if (!device->verified && pcmk__result_ok(result) && - (pcmk__strcase_any_of(cmd->action, "list", "monitor", "status", NULL))) { + if (!device->verified && pcmk__result_ok(result) + && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_LIST, + PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, + NULL)) { device->verified = TRUE; } @@ -3052,30 +3078,6 @@ check_alternate_host(const char *target) return NULL; } -/*! - * \internal - * \brief Send a reply to a CPG peer or IPC client - * - * \param[in] reply XML reply to send - * \param[in] call_options Send synchronously if st_opt_sync_call is set - * \param[in] remote_peer If not NULL, name of peer node to send CPG reply - * \param[in,out] client If not NULL, client to send IPC reply - */ -static void -stonith_send_reply(xmlNode *reply, int call_options, const char *remote_peer, - pcmk__client_t *client) -{ - CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)), - return); - - if (remote_peer == NULL) { - do_local_reply(reply, client, call_options); - } else { - send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, - reply, FALSE); - } -} - static void remove_relay_op(xmlNode * request) { diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c index dc67947..843b3d4 100644 --- a/daemons/fenced/fenced_remote.c +++ b/daemons/fenced/fenced_remote.c @@ -292,7 +292,7 @@ init_stonith_remote_op_hash_table(GHashTable **table) static const char * op_requested_action(const remote_fencing_op_t *op) { - return ((op->phase > st_phase_requested)? "reboot" : op->action); + return ((op->phase > st_phase_requested)? PCMK_ACTION_REBOOT : op->action); } /*! @@ -311,7 +311,7 @@ op_phase_off(remote_fencing_op_t *op) /* Happily, "off" and "on" are shorter than "reboot", so we can reuse the * memory allocation at each phase. */ - strcpy(op->action, "off"); + strcpy(op->action, PCMK_ACTION_OFF); } /*! @@ -329,7 +329,7 @@ op_phase_on(remote_fencing_op_t *op) "remapping to 'on' for %s " CRM_XS " id=%.8s", op->target, op->client_name, op->id); op->phase = st_phase_on; - strcpy(op->action, "on"); + strcpy(op->action, PCMK_ACTION_ON); /* Skip devices with automatic unfencing, because the cluster will handle it * when the node rejoins. @@ -362,7 +362,7 @@ undo_op_remap(remote_fencing_op_t *op) crm_info("Undoing remap of reboot targeting %s for %s " CRM_XS " id=%.8s", op->target, op->client_name, op->id); op->phase = st_phase_requested; - strcpy(op->action, "reboot"); + strcpy(op->action, PCMK_ACTION_REBOOT); } } @@ -673,8 +673,8 @@ remote_op_timeout_one(gpointer userdata) "Peer did not return fence result within timeout"); // The requested delay has been applied for the first device - if (op->delay > 0) { - op->delay = 0; + if (op->client_delay > 0) { + op->client_delay = 0; crm_trace("Try another device for '%s' action targeting %s " "for client %s without delay " CRM_XS " id=%.8s", op->action, op->target, op->client_name, op->id); @@ -961,12 +961,12 @@ advance_topology_level(remote_fencing_op_t *op, bool empty_ok) set_op_device_list(op, tp->levels[op->level]); // The requested delay has been applied for the first fencing level - if (op->level > 1 && op->delay > 0) { - op->delay = 0; + if ((op->level > 1) && (op->client_delay > 0)) { + op->client_delay = 0; } if ((g_list_next(op->devices_list) != NULL) - && pcmk__str_eq(op->action, "reboot", pcmk__str_none)) { + && pcmk__str_eq(op->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* A reboot has been requested for a topology level with multiple * devices. Instead of rebooting the devices sequentially, we will * turn them all off, then turn them all on again. (Think about @@ -1163,7 +1163,7 @@ create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer) crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout)); // Value -1 means disable any static/random fencing delays - crm_element_value_int(request, F_STONITH_DELAY, &(op->delay)); + crm_element_value_int(request, F_STONITH_DELAY, &(op->client_delay)); if (peer && dev) { op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID); @@ -1474,8 +1474,8 @@ get_device_timeout(const remote_fencing_op_t *op, return op->base_timeout; } - // op->delay < 0 means disable any static/random fencing delays - if (with_delay && op->delay >= 0) { + // op->client_delay < 0 means disable any static/random fencing delays + if (with_delay && (op->client_delay >= 0)) { // delay_base is eventually limited by delay_max delay = (props->delay_max[op->phase] > 0 ? props->delay_max[op->phase] : props->delay_base[op->phase]); @@ -1541,7 +1541,7 @@ get_op_total_timeout(const remote_fencing_op_t *op, GList *iter = NULL; GList *auto_list = NULL; - if (pcmk__str_eq(op->action, "on", pcmk__str_none) + if (pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none) && (op->automatic_list != NULL)) { auto_list = g_list_copy(op->automatic_list); } @@ -1620,7 +1620,7 @@ get_op_total_timeout(const remote_fencing_op_t *op, * up the total timeout. */ return ((total_timeout ? total_timeout : op->base_timeout) - + (op->delay > 0 ? op->delay : 0)); + + ((op->client_delay > 0)? op->client_delay : 0)); } static void @@ -1695,7 +1695,7 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device, /* Handle automatic unfencing if an "on" action was requested */ if ((op->phase == st_phase_requested) - && pcmk__str_eq(op->action, "on", pcmk__str_none)) { + && pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none)) { /* If the device we just executed was required, it's not anymore */ remove_required_device(op, device); @@ -1724,8 +1724,8 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device, op->target, op->client_name, op->originator); // The requested delay has been applied for the first device - if (op->delay > 0) { - op->delay = 0; + if (op->client_delay > 0) { + op->client_delay = 0; } request_peer_fencing(op, NULL); @@ -1794,7 +1794,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) * node back on when we should. */ device = op->devices->data; - if (pcmk__str_eq(fenced_device_reboot_action(device), "off", + if (pcmk__str_eq(fenced_device_reboot_action(device), PCMK_ACTION_OFF, pcmk__str_none)) { crm_info("Not turning %s back on using %s because the device is " "configured to stay off (pcmk_reboot_action='off')", @@ -1844,13 +1844,16 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) } if (peer) { - /* Take any requested fencing delay into account to prevent it from eating - * up the timeout. - */ - int timeout_one = (op->delay > 0 ? - TIMEOUT_MULTIPLY_FACTOR * op->delay : 0); + int timeout_one = 0; xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0); + if (op->client_delay > 0) { + /* Take requested fencing delay into account to prevent it from + * eating up the timeout. + */ + timeout_one = TIMEOUT_MULTIPLY_FACTOR * op->client_delay; + } + crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(remote_op, F_STONITH_TARGET, op->target); crm_xml_add(remote_op, F_STONITH_ACTION, op->action); @@ -1859,7 +1862,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout); crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options); - crm_xml_add_int(remote_op, F_STONITH_DELAY, op->delay); + crm_xml_add_int(remote_op, F_STONITH_DELAY, op->client_delay); if (device) { timeout_one += TIMEOUT_MULTIPLY_FACTOR * @@ -2097,7 +2100,7 @@ parse_action_specific(const xmlNode *xml, const char *peer, const char *device, } /* Handle devices with automatic unfencing */ - if (pcmk__str_eq(action, "on", pcmk__str_none)) { + if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { int required = 0; crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required); @@ -2160,11 +2163,11 @@ add_device_properties(const xmlNode *xml, remote_fencing_op_t *op, * values for "off" and "on" in child elements, just in case the reboot * winds up getting remapped. */ - if (pcmk__str_eq(ID(child), "off", pcmk__str_none)) { - parse_action_specific(child, peer->host, device, "off", + if (pcmk__str_eq(ID(child), PCMK_ACTION_OFF, pcmk__str_none)) { + parse_action_specific(child, peer->host, device, PCMK_ACTION_OFF, op, st_phase_off, props); - } else if (pcmk__str_eq(ID(child), "on", pcmk__str_none)) { - parse_action_specific(child, peer->host, device, "on", + } else if (pcmk__str_eq(ID(child), PCMK_ACTION_ON, pcmk__str_none)) { + parse_action_specific(child, peer->host, device, PCMK_ACTION_ON, op, st_phase_on, props); } } diff --git a/daemons/fenced/fenced_scheduler.c b/daemons/fenced/fenced_scheduler.c new file mode 100644 index 0000000..27d990f --- /dev/null +++ b/daemons/fenced/fenced_scheduler.c @@ -0,0 +1,225 @@ +/* + * Copyright 2009-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. +*/ + +#include <crm_internal.h> + +#include <stdio.h> +#include <errno.h> +#include <glib.h> + +#include <crm/pengine/status.h> +#include <crm/pengine/internal.h> + +#include <pacemaker-internal.h> +#include <pacemaker-fenced.h> + +static pcmk_scheduler_t *scheduler = NULL; + +/*! + * \internal + * \brief Initialize scheduler data for fencer purposes + * + * \return Standard Pacemaker return code + */ +int +fenced_scheduler_init(void) +{ + pcmk__output_t *logger = NULL; + int rc = pcmk__log_output_new(&logger); + + if (rc != pcmk_rc_ok) { + return rc; + } + + scheduler = pe_new_working_set(); + if (scheduler == NULL) { + pcmk__output_free(logger); + return ENOMEM; + } + + pe__register_messages(logger); + pcmk__register_lib_messages(logger); + pcmk__output_set_log_level(logger, LOG_TRACE); + scheduler->priv = logger; + + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Free all scheduler-related resources + */ +void +fenced_scheduler_cleanup(void) +{ + if (scheduler != NULL) { + pcmk__output_t *logger = scheduler->priv; + + if (logger != NULL) { + logger->finish(logger, CRM_EX_OK, true, NULL); + pcmk__output_free(logger); + scheduler->priv = NULL; + } + pe_free_working_set(scheduler); + scheduler = NULL; + } +} + +/*! + * \internal + * \brief Check whether the local node is in a resource's allowed node list + * + * \param[in] rsc Resource to check + * + * \return Pointer to node if found, otherwise NULL + */ +static pcmk_node_t * +local_node_allowed_for(const pcmk_resource_t *rsc) +{ + if ((rsc != NULL) && (stonith_our_uname != NULL)) { + GHashTableIter iter; + pcmk_node_t *node = NULL; + + g_hash_table_iter_init(&iter, rsc->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { + if (pcmk__str_eq(node->details->uname, stonith_our_uname, + pcmk__str_casei)) { + return node; + } + } + } + return NULL; +} + +/*! + * \internal + * \brief If a given resource or any of its children are fencing devices, + * register the devices + * + * \param[in,out] data Resource to check + * \param[in,out] user_data Ignored + */ +static void +register_if_fencing_device(gpointer data, gpointer user_data) +{ + pcmk_resource_t *rsc = data; + + xmlNode *xml = NULL; + GHashTableIter hash_iter; + pcmk_node_t *node = NULL; + const char *name = NULL; + const char *value = NULL; + const char *rclass = NULL; + const char *agent = NULL; + const char *rsc_provides = NULL; + stonith_key_value_t *params = NULL; + + // If this is a collective resource, check children instead + if (rsc->children != NULL) { + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { + register_if_fencing_device(iter->data, NULL); + if (pe_rsc_is_clone(rsc)) { + return; // Only one instance needs to be checked for clones + } + } + return; + } + + rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); + if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { + return; // Not a fencing device + } + + if (pe__resource_is_disabled(rsc)) { + crm_info("Ignoring fencing device %s because it is disabled", rsc->id); + return; + } + + if ((stonith_watchdog_timeout_ms <= 0) && + pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { + crm_info("Ignoring fencing device %s " + "because watchdog fencing is disabled", rsc->id); + return; + } + + // Check whether local node is allowed to run resource + node = local_node_allowed_for(rsc); + if (node == NULL) { + crm_info("Ignoring fencing device %s " + "because local node is not allowed to run it", rsc->id); + return; + } + if (node->weight < 0) { + crm_info("Ignoring fencing device %s " + "because local node has preference %s for it", + rsc->id, pcmk_readable_score(node->weight)); + return; + } + + // If device is in a group, check whether local node is allowed for group + if ((rsc->parent != NULL) + && (rsc->parent->variant == pcmk_rsc_variant_group)) { + pcmk_node_t *group_node = local_node_allowed_for(rsc->parent); + + if ((group_node != NULL) && (group_node->weight < 0)) { + crm_info("Ignoring fencing device %s " + "because local node has preference %s for its group", + rsc->id, pcmk_readable_score(group_node->weight)); + return; + } + } + + crm_debug("Reloading configuration of fencing device %s", rsc->id); + + agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE); + + get_meta_attributes(rsc->meta, rsc, node, scheduler); + rsc_provides = g_hash_table_lookup(rsc->meta, PCMK_STONITH_PROVIDES); + + g_hash_table_iter_init(&hash_iter, pe_rsc_params(rsc, node, scheduler)); + while (g_hash_table_iter_next(&hash_iter, (gpointer *) &name, + (gpointer *) &value)) { + if ((name == NULL) || (value == NULL)) { + continue; + } + params = stonith_key_value_add(params, name, value); + } + + xml = create_device_registration_xml(pcmk__s(rsc->clone_name, rsc->id), + st_namespace_any, agent, params, + rsc_provides); + stonith_key_value_freeall(params, 1, 1); + CRM_ASSERT(stonith_device_register(xml, TRUE) == pcmk_ok); + free_xml(xml); +} + +/*! + * \internal + * \brief Run the scheduler for fencer purposes + * + * \param[in] cib Cluster's current CIB + */ +void +fenced_scheduler_run(xmlNode *cib) +{ + CRM_CHECK((cib != NULL) && (scheduler != NULL), return); + + if (scheduler->now != NULL) { + crm_time_free(scheduler->now); + scheduler->now = NULL; + } + scheduler->localhost = stonith_our_uname; + pcmk__schedule_actions(cib, pcmk_sched_location_only + |pcmk_sched_no_compat + |pcmk_sched_no_counts, scheduler); + g_list_foreach(scheduler->resources, register_if_fencing_device, NULL); + + scheduler->input = NULL; // Wasn't a copy, so don't let API free it + pe_reset_working_set(scheduler); +} diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c index 4edda6c..7c69fb8 100644 --- a/daemons/fenced/pacemaker-fenced.c +++ b/daemons/fenced/pacemaker-fenced.c @@ -27,7 +27,6 @@ #include <crm/common/ipc.h> #include <crm/common/ipc_internal.h> #include <crm/common/output_internal.h> -#include <crm/cluster/internal.h> #include <crm/stonith-ng.h> #include <crm/fencing/internal.h> @@ -37,8 +36,6 @@ #include <crm/common/mainloop.h> #include <crm/cib/internal.h> -#include <crm/pengine/status.h> -#include <pacemaker-internal.h> #include <pacemaker-fenced.h> @@ -51,18 +48,9 @@ GList *stonith_watchdog_targets = NULL; static GMainLoop *mainloop = NULL; gboolean stand_alone = FALSE; -static gboolean stonith_shutdown_flag = FALSE; +gboolean stonith_shutdown_flag = FALSE; static qb_ipcs_service_t *ipcs = NULL; -static xmlNode *local_cib = NULL; -static pe_working_set_t *fenced_data_set = NULL; -static const unsigned long long data_set_flags = pe_flag_quick_location - | pe_flag_no_compat - | pe_flag_no_counts; - -static cib_t *cib_api = NULL; - -static pcmk__output_t *logger_out = NULL; static pcmk__output_t *out = NULL; pcmk__supported_format_t formats[] = { @@ -77,9 +65,8 @@ static struct { gchar **log_files; } options; -static crm_exit_t exit_code = CRM_EX_OK; +crm_exit_t exit_code = CRM_EX_OK; -static void stonith_shutdown(int nsig); static void stonith_cleanup(void); static int32_t @@ -241,7 +228,8 @@ stonith_peer_cs_destroy(gpointer user_data) #endif void -do_local_reply(xmlNode *notify_src, pcmk__client_t *client, int call_options) +do_local_reply(const xmlNode *notify_src, pcmk__client_t *client, + int call_options) { /* send callback to originating child */ int local_rc = pcmk_rc_ok; @@ -292,7 +280,7 @@ static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { - xmlNode *update_msg = user_data; + const xmlNode *update_msg = user_data; pcmk__client_t *client = value; const char *type = NULL; @@ -443,589 +431,6 @@ fenced_send_level_notification(const char *op, send_config_notification(op, result, desc, g_hash_table_size(topology)); } -static void -topology_remove_helper(const char *node, int level) -{ - char *desc = NULL; - pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; - xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); - - crm_xml_add(data, F_STONITH_ORIGIN, __func__); - crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); - crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); - - fenced_unregister_level(data, &desc, &result); - fenced_send_level_notification(STONITH_OP_LEVEL_DEL, &result, desc); - pcmk__reset_result(&result); - free_xml(data); - free(desc); -} - -static void -remove_cib_device(xmlXPathObjectPtr xpathObj) -{ - int max = numXpathResults(xpathObj), lpc = 0; - - for (lpc = 0; lpc < max; lpc++) { - const char *rsc_id = NULL; - const char *standard = NULL; - xmlNode *match = getXpathResult(xpathObj, lpc); - - CRM_LOG_ASSERT(match != NULL); - if(match != NULL) { - standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); - } - - if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { - continue; - } - - rsc_id = crm_element_value(match, XML_ATTR_ID); - - stonith_device_remove(rsc_id, true); - } -} - -static void -remove_topology_level(xmlNode *match) -{ - int index = 0; - char *key = NULL; - - CRM_CHECK(match != NULL, return); - - key = stonith_level_key(match, fenced_target_by_unknown); - crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); - topology_remove_helper(key, index); - free(key); -} - -static void -add_topology_level(xmlNode *match) -{ - char *desc = NULL; - pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; - - CRM_CHECK(match != NULL, return); - - fenced_register_level(match, &desc, &result); - fenced_send_level_notification(STONITH_OP_LEVEL_ADD, &result, desc); - pcmk__reset_result(&result); - free(desc); -} - -static void -remove_fencing_topology(xmlXPathObjectPtr xpathObj) -{ - int max = numXpathResults(xpathObj), lpc = 0; - - for (lpc = 0; lpc < max; lpc++) { - xmlNode *match = getXpathResult(xpathObj, lpc); - - CRM_LOG_ASSERT(match != NULL); - if (match && crm_element_value(match, XML_DIFF_MARKER)) { - /* Deletion */ - int index = 0; - char *target = stonith_level_key(match, fenced_target_by_unknown); - - crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); - if (target == NULL) { - crm_err("Invalid fencing target in element %s", ID(match)); - - } else if (index <= 0) { - crm_err("Invalid level for %s in element %s", target, ID(match)); - - } else { - topology_remove_helper(target, index); - } - /* } else { Deal with modifications during the 'addition' stage */ - } - } -} - -static void -register_fencing_topology(xmlXPathObjectPtr xpathObj) -{ - int max = numXpathResults(xpathObj), lpc = 0; - - for (lpc = 0; lpc < max; lpc++) { - xmlNode *match = getXpathResult(xpathObj, lpc); - - remove_topology_level(match); - add_topology_level(match); - } -} - -/* Fencing -<diff crm_feature_set="3.0.6"> - <diff-removed> - <fencing-topology> - <fencing-level id="f-p1.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="removed:top"/> - <fencing-level id="f-p1.2" target="pcmk-1" index="2" devices="power" __crm_diff_marker__="removed:top"/> - <fencing-level devices="disk,network" id="f-p2.1"/> - </fencing-topology> - </diff-removed> - <diff-added> - <fencing-topology> - <fencing-level id="f-p.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="added:top"/> - <fencing-level id="f-p2.1" target="pcmk-2" index="1" devices="disk,something"/> - <fencing-level id="f-p3.1" target="pcmk-2" index="2" devices="power" __crm_diff_marker__="added:top"/> - </fencing-topology> - </diff-added> -</diff> -*/ - -static void -fencing_topology_init(void) -{ - xmlXPathObjectPtr xpathObj = NULL; - const char *xpath = "//" XML_TAG_FENCING_LEVEL; - - crm_trace("Full topology refresh"); - free_topology_list(); - init_topology_list(); - - /* Grab everything */ - xpathObj = xpath_search(local_cib, xpath); - register_fencing_topology(xpathObj); - - freeXpathObject(xpathObj); -} - -#define rsc_name(x) x->clone_name?x->clone_name:x->id - -/*! - * \internal - * \brief Check whether our uname is in a resource's allowed node list - * - * \param[in] rsc Resource to check - * - * \return Pointer to node object if found, NULL otherwise - */ -static pe_node_t * -our_node_allowed_for(const pe_resource_t *rsc) -{ - GHashTableIter iter; - pe_node_t *node = NULL; - - if (rsc && stonith_our_uname) { - g_hash_table_iter_init(&iter, rsc->allowed_nodes); - while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { - if (node && strcmp(node->details->uname, stonith_our_uname) == 0) { - break; - } - node = NULL; - } - } - return node; -} - -static void -watchdog_device_update(void) -{ - if (stonith_watchdog_timeout_ms > 0) { - if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) && - !stonith_watchdog_targets) { - /* getting here watchdog-fencing enabled, no device there yet - and reason isn't stonith_watchdog_targets preventing that - */ - int rc; - xmlNode *xml; - - xml = create_device_registration_xml( - STONITH_WATCHDOG_ID, - st_namespace_internal, - STONITH_WATCHDOG_AGENT, - NULL, /* stonith_device_register will add our - own name as PCMK_STONITH_HOST_LIST param - so we can skip that here - */ - NULL); - rc = stonith_device_register(xml, TRUE); - free_xml(xml); - if (rc != pcmk_ok) { - rc = pcmk_legacy2rc(rc); - exit_code = CRM_EX_FATAL; - crm_crit("Cannot register watchdog pseudo fence agent: %s", - pcmk_rc_str(rc)); - stonith_shutdown(0); - } - } - - } else if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) != NULL) { - /* be silent if no device - todo parameter to stonith_device_remove */ - stonith_device_remove(STONITH_WATCHDOG_ID, true); - } -} - -static void -update_stonith_watchdog_timeout_ms(xmlNode *cib) -{ - long timeout_ms = 0; - xmlNode *stonith_watchdog_xml = NULL; - const char *value = NULL; - - stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", - cib, LOG_NEVER); - if (stonith_watchdog_xml) { - value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); - } - if (value) { - timeout_ms = crm_get_msec(value); - } - - if (timeout_ms < 0) { - timeout_ms = pcmk__auto_watchdog_timeout(); - } - - stonith_watchdog_timeout_ms = timeout_ms; -} - -/*! - * \internal - * \brief If a resource or any of its children are STONITH devices, update their - * definitions given a cluster working set. - * - * \param[in,out] rsc Resource to check - * \param[in,out] data_set Cluster working set with device information - */ -static void -cib_device_update(pe_resource_t *rsc, pe_working_set_t *data_set) -{ - pe_node_t *node = NULL; - const char *value = NULL; - const char *rclass = NULL; - pe_node_t *parent = NULL; - - /* If this is a complex resource, check children rather than this resource itself. */ - if(rsc->children) { - GList *gIter = NULL; - for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { - cib_device_update(gIter->data, data_set); - if(pe_rsc_is_clone(rsc)) { - crm_trace("Only processing one copy of the clone %s", rsc->id); - break; - } - } - return; - } - - /* We only care about STONITH resources. */ - rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); - if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { - return; - } - - /* If this STONITH resource is disabled, remove it. */ - if (pe__resource_is_disabled(rsc)) { - crm_info("Device %s has been disabled", rsc->id); - return; - } - - /* if watchdog-fencing is disabled handle any watchdog-fence - resource as if it was disabled - */ - if ((stonith_watchdog_timeout_ms <= 0) && - pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { - crm_info("Watchdog-fencing disabled thus handling " - "device %s as disabled", rsc->id); - return; - } - - /* Check whether our node is allowed for this resource (and its parent if in a group) */ - node = our_node_allowed_for(rsc); - if (rsc->parent && (rsc->parent->variant == pe_group)) { - parent = our_node_allowed_for(rsc->parent); - } - - if(node == NULL) { - /* Our node is disallowed, so remove the device */ - GHashTableIter iter; - - crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname); - g_hash_table_iter_init(&iter, rsc->allowed_nodes); - while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { - crm_trace("Available: %s = %d", pe__node_name(node), node->weight); - } - - return; - - } else if(node->weight < 0 || (parent && parent->weight < 0)) { - /* Our node (or its group) is disallowed by score, so remove the device */ - int score = (node->weight < 0)? node->weight : parent->weight; - - crm_info("Device %s has been disabled on %s: score=%s", - rsc->id, stonith_our_uname, pcmk_readable_score(score)); - return; - - } else { - /* Our node is allowed, so update the device information */ - int rc; - xmlNode *data; - GHashTable *rsc_params = NULL; - GHashTableIter gIter; - stonith_key_value_t *params = NULL; - - const char *name = NULL; - const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE); - const char *rsc_provides = NULL; - - crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight); - rsc_params = pe_rsc_params(rsc, node, data_set); - get_meta_attributes(rsc->meta, rsc, node, data_set); - - rsc_provides = g_hash_table_lookup(rsc->meta, PCMK_STONITH_PROVIDES); - - g_hash_table_iter_init(&gIter, rsc_params); - while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) { - if (!name || !value) { - continue; - } - params = stonith_key_value_add(params, name, value); - crm_trace(" %s=%s", name, value); - } - - data = create_device_registration_xml(rsc_name(rsc), st_namespace_any, - agent, params, rsc_provides); - stonith_key_value_freeall(params, 1, 1); - rc = stonith_device_register(data, TRUE); - CRM_ASSERT(rc == pcmk_ok); - free_xml(data); - } -} - -/*! - * \internal - * \brief Update all STONITH device definitions based on current CIB - */ -static void -cib_devices_update(void) -{ - GHashTableIter iter; - stonith_device_t *device = NULL; - - crm_info("Updating devices to version %s.%s.%s", - crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN), - crm_element_value(local_cib, XML_ATTR_GENERATION), - crm_element_value(local_cib, XML_ATTR_NUMUPDATES)); - - if (fenced_data_set->now != NULL) { - crm_time_free(fenced_data_set->now); - fenced_data_set->now = NULL; - } - fenced_data_set->localhost = stonith_our_uname; - pcmk__schedule_actions(local_cib, data_set_flags, fenced_data_set); - - g_hash_table_iter_init(&iter, device_list); - while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { - if (device->cib_registered) { - device->dirty = TRUE; - } - } - - /* have list repopulated if cib has a watchdog-fencing-resource - TODO: keep a cached list for queries happening while we are refreshing - */ - g_list_free_full(stonith_watchdog_targets, free); - stonith_watchdog_targets = NULL; - g_list_foreach(fenced_data_set->resources, (GFunc) cib_device_update, fenced_data_set); - - g_hash_table_iter_init(&iter, device_list); - while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { - if (device->dirty) { - g_hash_table_iter_remove(&iter); - } - } - - fenced_data_set->input = NULL; // Wasn't a copy, so don't let API free it - pe_reset_working_set(fenced_data_set); -} - -static void -update_cib_stonith_devices_v2(const char *event, xmlNode * msg) -{ - xmlNode *change = NULL; - char *reason = NULL; - bool needs_update = FALSE; - xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); - - for (change = pcmk__xml_first_child(patchset); change != NULL; - change = pcmk__xml_next(change)) { - const char *op = crm_element_value(change, XML_DIFF_OP); - const char *xpath = crm_element_value(change, XML_DIFF_PATH); - const char *shortpath = NULL; - - if ((op == NULL) || - (strcmp(op, "move") == 0) || - strstr(xpath, "/"XML_CIB_TAG_STATUS)) { - continue; - } else if (pcmk__str_eq(op, "delete", pcmk__str_casei) && strstr(xpath, "/"XML_CIB_TAG_RESOURCE)) { - const char *rsc_id = NULL; - char *search = NULL; - char *mutable = NULL; - - if (strstr(xpath, XML_TAG_ATTR_SETS) || - strstr(xpath, XML_TAG_META_SETS)) { - needs_update = TRUE; - pcmk__str_update(&reason, - "(meta) attribute deleted from resource"); - break; - } - pcmk__str_update(&mutable, xpath); - rsc_id = strstr(mutable, "primitive[@" XML_ATTR_ID "=\'"); - if (rsc_id != NULL) { - rsc_id += strlen("primitive[@" XML_ATTR_ID "=\'"); - search = strchr(rsc_id, '\''); - } - if (search != NULL) { - *search = 0; - stonith_device_remove(rsc_id, true); - /* watchdog_device_update called afterwards - to fall back to implicit definition if needed */ - } else { - crm_warn("Ignoring malformed CIB update (resource deletion)"); - } - free(mutable); - - } else if (strstr(xpath, "/"XML_CIB_TAG_RESOURCES) || - strstr(xpath, "/"XML_CIB_TAG_CONSTRAINTS) || - strstr(xpath, "/"XML_CIB_TAG_RSCCONFIG)) { - shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); - reason = crm_strdup_printf("%s %s", op, shortpath+1); - needs_update = TRUE; - break; - } - } - - if(needs_update) { - crm_info("Updating device list from CIB: %s", reason); - cib_devices_update(); - } else { - crm_trace("No updates for device list found in CIB"); - } - free(reason); -} - - -static void -update_cib_stonith_devices_v1(const char *event, xmlNode * msg) -{ - const char *reason = "none"; - gboolean needs_update = FALSE; - xmlXPathObjectPtr xpath_obj = NULL; - - /* process new constraints */ - xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION); - if (numXpathResults(xpath_obj) > 0) { - int max = numXpathResults(xpath_obj), lpc = 0; - - /* Safest and simplest to always recompute */ - needs_update = TRUE; - reason = "new location constraint"; - - for (lpc = 0; lpc < max; lpc++) { - xmlNode *match = getXpathResult(xpath_obj, lpc); - - crm_log_xml_trace(match, "new constraint"); - } - } - freeXpathObject(xpath_obj); - - /* process deletions */ - xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE); - if (numXpathResults(xpath_obj) > 0) { - remove_cib_device(xpath_obj); - } - freeXpathObject(xpath_obj); - - /* process additions */ - xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE); - if (numXpathResults(xpath_obj) > 0) { - int max = numXpathResults(xpath_obj), lpc = 0; - - for (lpc = 0; lpc < max; lpc++) { - const char *rsc_id = NULL; - const char *standard = NULL; - xmlNode *match = getXpathResult(xpath_obj, lpc); - - rsc_id = crm_element_value(match, XML_ATTR_ID); - standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); - - if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { - continue; - } - - crm_trace("Fencing resource %s was added or modified", rsc_id); - reason = "new resource"; - needs_update = TRUE; - } - } - freeXpathObject(xpath_obj); - - if(needs_update) { - crm_info("Updating device list from CIB: %s", reason); - cib_devices_update(); - } -} - -static void -update_cib_stonith_devices(const char *event, xmlNode * msg) -{ - int format = 1; - xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); - - CRM_ASSERT(patchset); - crm_element_value_int(patchset, "format", &format); - switch(format) { - case 1: - update_cib_stonith_devices_v1(event, msg); - break; - case 2: - update_cib_stonith_devices_v2(event, msg); - break; - default: - crm_warn("Unknown patch format: %d", format); - } -} - -/*! - * \internal - * \brief Check whether a node has a specific attribute name/value - * - * \param[in] node Name of node to check - * \param[in] name Name of an attribute to look for - * \param[in] value The value the named attribute needs to be set to in order to be considered a match - * - * \return TRUE if the locally cached CIB has the specified node attribute - */ -gboolean -node_has_attr(const char *node, const char *name, const char *value) -{ - GString *xpath = NULL; - xmlNode *match; - - CRM_CHECK((local_cib != NULL) && (node != NULL) && (name != NULL) - && (value != NULL), return FALSE); - - /* Search for the node's attributes in the CIB. While the schema allows - * multiple sets of instance attributes, and allows instance attributes to - * use id-ref to reference values elsewhere, that is intended for resources, - * so we ignore that here. - */ - xpath = g_string_sized_new(256); - pcmk__g_strcat(xpath, - "//" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE - "[@" XML_ATTR_UNAME "='", node, "']/" XML_TAG_ATTR_SETS - "/" XML_CIB_TAG_NVPAIR - "[@" XML_NVPAIR_ATTR_NAME "='", name, "' " - "and @" XML_NVPAIR_ATTR_VALUE "='", value, "']", NULL); - - match = get_xpath_object((const char *) xpath->str, local_cib, LOG_NEVER); - - g_string_free(xpath, TRUE); - return (match != NULL); -} - /*! * \internal * \brief Check whether a node does watchdog-fencing @@ -1043,201 +448,7 @@ node_does_watchdog_fencing(const char *node) pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei)); } - -static void -update_fencing_topology(const char *event, xmlNode * msg) -{ - int format = 1; - const char *xpath; - xmlXPathObjectPtr xpathObj = NULL; - xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); - - CRM_ASSERT(patchset); - crm_element_value_int(patchset, "format", &format); - - if(format == 1) { - /* Process deletions (only) */ - xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL; - xpathObj = xpath_search(msg, xpath); - - remove_fencing_topology(xpathObj); - freeXpathObject(xpathObj); - - /* Process additions and changes */ - xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; - xpathObj = xpath_search(msg, xpath); - - register_fencing_topology(xpathObj); - freeXpathObject(xpathObj); - - } else if(format == 2) { - xmlNode *change = NULL; - int add[] = { 0, 0, 0 }; - int del[] = { 0, 0, 0 }; - - xml_patch_versions(patchset, add, del); - - for (change = pcmk__xml_first_child(patchset); change != NULL; - change = pcmk__xml_next(change)) { - const char *op = crm_element_value(change, XML_DIFF_OP); - const char *xpath = crm_element_value(change, XML_DIFF_PATH); - - if(op == NULL) { - continue; - - } else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) { - /* Change to a specific entry */ - - crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); - if(strcmp(op, "move") == 0) { - continue; - - } else if(strcmp(op, "create") == 0) { - add_topology_level(change->children); - - } else if(strcmp(op, "modify") == 0) { - xmlNode *match = first_named_child(change, XML_DIFF_RESULT); - - if(match) { - remove_topology_level(match->children); - add_topology_level(match->children); - } - - } else if(strcmp(op, "delete") == 0) { - /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */ - crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s", - op, add[0], add[1], add[2], xpath); - fencing_topology_init(); - return; - } - - } else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) { - /* Change to the topology in general */ - crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s", - op, add[0], add[1], add[2], xpath); - fencing_topology_init(); - return; - - } else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) { - /* Changes to the whole config section, possibly including the topology as a whild */ - if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) { - crm_trace("Nothing for us in %s operation %d.%d.%d for %s.", - op, add[0], add[1], add[2], xpath); - - } else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) { - crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.", - op, add[0], add[1], add[2], xpath); - fencing_topology_init(); - return; - } - - } else { - crm_trace("Nothing for us in %s operation %d.%d.%d for %s", - op, add[0], add[1], add[2], xpath); - } - } - - } else { - crm_warn("Unknown patch format: %d", format); - } -} -static bool have_cib_devices = FALSE; - -static void -update_cib_cache_cb(const char *event, xmlNode * msg) -{ - int rc = pcmk_ok; - long timeout_ms_saved = stonith_watchdog_timeout_ms; - bool need_full_refresh = false; - - if(!have_cib_devices) { - crm_trace("Skipping updates until we get a full dump"); - return; - - } else if(msg == NULL) { - crm_trace("Missing %s update", event); - return; - } - - /* Maintain a local copy of the CIB so that we have full access - * to device definitions, location constraints, and node attributes - */ - if (local_cib != NULL) { - int rc = pcmk_ok; - xmlNode *patchset = NULL; - - crm_element_value_int(msg, F_CIB_RC, &rc); - if (rc != pcmk_ok) { - return; - } - - patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); - pcmk__output_set_log_level(logger_out, LOG_TRACE); - out->message(out, "xml-patchset", patchset); - rc = xml_apply_patchset(local_cib, patchset, TRUE); - switch (rc) { - case pcmk_ok: - case -pcmk_err_old_data: - break; - case -pcmk_err_diff_resync: - case -pcmk_err_diff_failed: - crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); - free_xml(local_cib); - local_cib = NULL; - break; - default: - crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); - free_xml(local_cib); - local_cib = NULL; - } - } - - if (local_cib == NULL) { - crm_trace("Re-requesting full CIB"); - rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call); - if(rc != pcmk_ok) { - crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc); - return; - } - CRM_ASSERT(local_cib != NULL); - need_full_refresh = true; - } - - pcmk__refresh_node_caches_from_cib(local_cib); - update_stonith_watchdog_timeout_ms(local_cib); - - if (timeout_ms_saved != stonith_watchdog_timeout_ms) { - need_full_refresh = true; - } - - if (need_full_refresh) { - fencing_topology_init(); - cib_devices_update(); - } else { - // Partial refresh - update_fencing_topology(event, msg); - update_cib_stonith_devices(event, msg); - } - - watchdog_device_update(); -} - -static void -init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) -{ - crm_info("Updating device list from CIB"); - have_cib_devices = TRUE; - local_cib = copy_xml(output); - - pcmk__refresh_node_caches_from_cib(local_cib); - update_stonith_watchdog_timeout_ms(local_cib); - - fencing_topology_init(); - cib_devices_update(); - watchdog_device_update(); -} - -static void +void stonith_shutdown(int nsig) { crm_info("Terminating with %d clients", pcmk__ipc_client_count()); @@ -1248,28 +459,9 @@ stonith_shutdown(int nsig) } static void -cib_connection_destroy(gpointer user_data) -{ - if (stonith_shutdown_flag) { - crm_info("Connection to the CIB manager closed"); - return; - } else { - crm_crit("Lost connection to the CIB manager, shutting down"); - } - if (cib_api) { - cib_api->cmds->signoff(cib_api); - } - stonith_shutdown(0); -} - -static void stonith_cleanup(void) { - if (cib_api) { - cib_api->cmds->del_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb); - cib_api->cmds->signoff(cib_api); - } - + fenced_cib_cleanup(); if (ipcs) { qb_ipcs_destroy(ipcs); } @@ -1284,9 +476,6 @@ stonith_cleanup(void) free(stonith_our_uname); stonith_our_uname = NULL; - - free_xml(local_cib); - local_cib = NULL; } static gboolean @@ -1298,38 +487,6 @@ stand_alone_cpg_cb(const gchar *option_name, const gchar *optarg, gpointer data, return TRUE; } -static void -setup_cib(void) -{ - int rc, retries = 0; - - cib_api = cib_new(); - if (cib_api == NULL) { - crm_err("No connection to the CIB manager"); - return; - } - - do { - sleep(retries); - rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command); - } while (rc == -ENOTCONN && ++retries < 5); - - if (rc != pcmk_ok) { - crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc); - - } else if (pcmk_ok != - cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { - crm_err("Could not set CIB notification callback"); - - } else { - rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local); - cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", - init_cib_cache_cb); - cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); - crm_info("Watching for fencing topology changes"); - } -} - struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = NULL, @@ -1435,10 +592,11 @@ static pcmk__cluster_option_t fencer_options[] = { "Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.") }, { - "pcmk_reboot_action",NULL, "string", NULL, "reboot", NULL, - N_("Advanced use only: An alternate command to run instead of 'reboot'"), + "pcmk_reboot_action", NULL, "string", NULL, + PCMK_ACTION_REBOOT, NULL, + N_("Advanced use only: An alternate command to run instead of 'reboot'"), N_("Some devices do not support the standard commands or may provide additional ones.\n" - "Use this to specify an alternate, device-specific, command that implements the \'reboot\' action.") + "Use this to specify an alternate, device-specific, command that implements the \'reboot\' action.") }, { "pcmk_reboot_timeout",NULL, "time", NULL, "60s", NULL, @@ -1454,10 +612,11 @@ static pcmk__cluster_option_t fencer_options[] = { " Use this option to alter the number of times Pacemaker retries \'reboot\' actions before giving up.") }, { - "pcmk_off_action",NULL, "string", NULL, "off", NULL, - N_("Advanced use only: An alternate command to run instead of \'off\'"), + "pcmk_off_action", NULL, "string", NULL, + PCMK_ACTION_OFF, NULL, + N_("Advanced use only: An alternate command to run instead of \'off\'"), N_("Some devices do not support the standard commands or may provide additional ones." - "Use this to specify an alternate, device-specific, command that implements the \'off\' action.") + "Use this to specify an alternate, device-specific, command that implements the \'off\' action.") }, { "pcmk_off_timeout",NULL, "time", NULL, "60s", NULL, @@ -1473,10 +632,11 @@ static pcmk__cluster_option_t fencer_options[] = { " Use this option to alter the number of times Pacemaker retries \'off\' actions before giving up.") }, { - "pcmk_on_action",NULL, "string", NULL, "on", NULL, - N_("Advanced use only: An alternate command to run instead of 'on'"), + "pcmk_on_action", NULL, "string", NULL, + PCMK_ACTION_ON, NULL, + N_("Advanced use only: An alternate command to run instead of 'on'"), N_("Some devices do not support the standard commands or may provide additional ones." - "Use this to specify an alternate, device-specific, command that implements the \'on\' action.") + "Use this to specify an alternate, device-specific, command that implements the \'on\' action.") }, { "pcmk_on_timeout",NULL, "time", NULL, "60s", NULL, @@ -1492,10 +652,11 @@ static pcmk__cluster_option_t fencer_options[] = { " Use this option to alter the number of times Pacemaker retries \'on\' actions before giving up.") }, { - "pcmk_list_action",NULL, "string", NULL, "list", NULL, - N_("Advanced use only: An alternate command to run instead of \'list\'"), + "pcmk_list_action",NULL, "string", NULL, + PCMK_ACTION_LIST, NULL, + N_("Advanced use only: An alternate command to run instead of \'list\'"), N_("Some devices do not support the standard commands or may provide additional ones." - "Use this to specify an alternate, device-specific, command that implements the \'list\' action.") + "Use this to specify an alternate, device-specific, command that implements the \'list\' action.") }, { "pcmk_list_timeout",NULL, "time", NULL, "60s", NULL, @@ -1511,7 +672,8 @@ static pcmk__cluster_option_t fencer_options[] = { " Use this option to alter the number of times Pacemaker retries \'list\' actions before giving up.") }, { - "pcmk_monitor_action",NULL, "string", NULL, "monitor", NULL, + "pcmk_monitor_action", NULL, "string", NULL, + PCMK_ACTION_MONITOR, NULL, N_("Advanced use only: An alternate command to run instead of \'monitor\'"), N_("Some devices do not support the standard commands or may provide additional ones." "Use this to specify an alternate, device-specific, command that implements the \'monitor\' action.") @@ -1530,10 +692,11 @@ static pcmk__cluster_option_t fencer_options[] = { " Use this option to alter the number of times Pacemaker retries \'monitor\' actions before giving up.") }, { - "pcmk_status_action",NULL, "string", NULL, "status", NULL, - N_("Advanced use only: An alternate command to run instead of \'status\'"), + "pcmk_status_action", NULL, "string", NULL, + PCMK_ACTION_STATUS, NULL, + N_("Advanced use only: An alternate command to run instead of \'status\'"), N_("Some devices do not support the standard commands or may provide additional ones." - "Use this to specify an alternate, device-specific, command that implements the \'status\' action.") + "Use this to specify an alternate, device-specific, command that implements the \'status\' action.") }, { "pcmk_status_timeout",NULL, "time", NULL, "60s", NULL, @@ -1568,13 +731,13 @@ fencer_metadata(void) static GOptionEntry entries[] = { { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone, - "Deprecated (will be removed in a future release)", NULL }, + N_("Deprecated (will be removed in a future release)"), NULL }, { "stand-alone-w-cpg", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, - stand_alone_cpg_cb, "Intended for use in regression testing only", NULL }, + stand_alone_cpg_cb, N_("Intended for use in regression testing only"), NULL }, { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, - &options.log_files, "Send logs to the additional named logfile", NULL }, + &options.log_files, N_("Send logs to the additional named logfile"), NULL }, { NULL } }; @@ -1649,7 +812,7 @@ main(int argc, char **argv) goto done; } - if (crm_ipc_connect(old_instance)) { + if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) { // IPC endpoint already up crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); @@ -1665,26 +828,15 @@ main(int argc, char **argv) crm_peer_init(); - fenced_data_set = pe_new_working_set(); - CRM_ASSERT(fenced_data_set != NULL); - - cluster = pcmk_cluster_new(); - - /* Initialize the logger prior to setup_cib(). update_cib_cache_cb() may - * call the "xml-patchset" message function, which needs the logger, after - * setup_cib() has run. - */ - rc = pcmk__log_output_new(&logger_out) != pcmk_rc_ok; + rc = fenced_scheduler_init(); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, - "Error creating output format log: %s", pcmk_rc_str(rc)); + "Error initializing scheduler data: %s", pcmk_rc_str(rc)); goto done; } - pe__register_messages(logger_out); - pcmk__register_lib_messages(logger_out); - pcmk__output_set_log_level(logger_out, LOG_TRACE); - fenced_data_set->priv = logger_out; + + cluster = pcmk_cluster_new(); if (!stand_alone) { #if SUPPORT_COROSYNC @@ -1732,15 +884,10 @@ done: stonith_cleanup(); pcmk_cluster_free(cluster); - pe_free_working_set(fenced_data_set); + fenced_scheduler_cleanup(); pcmk__output_and_clear_error(&error, out); - if (logger_out != NULL) { - logger_out->finish(logger_out, exit_code, true, NULL); - pcmk__output_free(logger_out); - } - if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h index a3d2e17..220978a 100644 --- a/daemons/fenced/pacemaker-fenced.h +++ b/daemons/fenced/pacemaker-fenced.h @@ -6,7 +6,12 @@ */ #include <stdint.h> // uint32_t, uint64_t +#include <libxml/tree.h> // xmlNode + #include <crm/common/mainloop.h> +#include <crm/cluster.h> +#include <crm/stonith-ng.h> +#include <crm/fencing/internal.h> /*! * \internal @@ -104,9 +109,12 @@ typedef struct remote_fencing_op_s { * values associated with the devices this fencing operation may call */ gint total_timeout; - /*! Requested fencing delay. - * Value -1 means disable any static/random fencing delays. */ - int delay; + /*! + * Fencing delay (in seconds) requested by API client (used by controller to + * implement priority-fencing-delay). A value of -1 means disable all + * configured delays. + */ + int client_delay; /*! Delegate is the node being asked to perform a fencing action * on behalf of the node that owns the remote operation. Some operations @@ -205,6 +213,8 @@ typedef struct stonith_topology_s { } stonith_topology_t; +void stonith_shutdown(int nsig); + void init_device_list(void); void free_device_list(void); void init_topology_list(void); @@ -231,7 +241,7 @@ void fenced_unregister_level(xmlNode *msg, char **desc, stonith_topology_t *find_topology_for_host(const char *host); -void do_local_reply(xmlNode *notify_src, pcmk__client_t *client, +void do_local_reply(const xmlNode *notify_src, pcmk__client_t *client, int call_options); xmlNode *fenced_construct_reply(const xmlNode *request, xmlNode *data, @@ -280,6 +290,14 @@ gboolean node_has_attr(const char *node, const char *name, const char *value); gboolean node_does_watchdog_fencing(const char *node); +void fencing_topology_init(void); +void setup_cib(void); +void fenced_cib_cleanup(void); + +int fenced_scheduler_init(void); +void fenced_scheduler_cleanup(void); +void fenced_scheduler_run(xmlNode *cib); + static inline void fenced_set_protocol_error(pcmk__action_result_t *result) { @@ -299,7 +317,7 @@ fenced_set_protocol_error(pcmk__action_result_t *result) static inline uint32_t fenced_support_flag(const char *action) { - if (pcmk__str_eq(action, "on", pcmk__str_none)) { + if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { return st_device_supports_on; } return st_device_supports_none; @@ -311,5 +329,6 @@ extern GHashTable *device_list; extern GHashTable *topology; extern long stonith_watchdog_timeout_ms; extern GList *stonith_watchdog_targets; - extern GHashTable *stonith_remote_op_list; +extern crm_exit_t exit_code; +extern gboolean stonith_shutdown_flag; diff --git a/daemons/pacemakerd/Makefile.am b/daemons/pacemakerd/Makefile.am index fc0e014..78e7c37 100644 --- a/daemons/pacemakerd/Makefile.am +++ b/daemons/pacemakerd/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright 2004-2021 the Pacemaker project contributors +# Copyright 2004-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # @@ -25,8 +25,10 @@ noinst_HEADERS = pacemakerd.h pacemakerd_CFLAGS = $(CFLAGS_HARDENED_EXE) pacemakerd_LDFLAGS = $(LDFLAGS_HARDENED_EXE) -pacemakerd_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la $(top_builddir)/lib/common/libcrmcommon.la -pacemakerd_LDADD += $(CLUSTERLIBS) +pacemakerd_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la +pacemakerd_LDADD += $(top_builddir)/lib/common/libcrmcommon.la +pacemakerd_LDADD += $(CLUSTERLIBS) + pacemakerd_SOURCES = pacemakerd.c if BUILD_CS_SUPPORT pacemakerd_SOURCES += pcmkd_corosync.c diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c index 9f77ccc..365b743 100644 --- a/daemons/pacemakerd/pacemakerd.c +++ b/daemons/pacemakerd/pacemakerd.c @@ -92,7 +92,7 @@ pid_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **er static gboolean standby_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.standby = TRUE; - pcmk__set_env_option("node_start_state", "standby"); + pcmk__set_env_option(PCMK__ENV_NODE_START_STATE, "standby", false); return TRUE; } @@ -129,7 +129,7 @@ pcmk_sigquit(int nsig) } static void -mcp_chown(const char *path, uid_t uid, gid_t gid) +pacemakerd_chown(const char *path, uid_t uid, gid_t gid) { int rc = chown(path, uid, gid); @@ -166,7 +166,7 @@ create_pcmk_dirs(void) crm_warn("Could not create directory " CRM_STATE_DIR ": %s", pcmk_rc_str(errno)); } else { - mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid); + pacemakerd_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid); } for (int i = 0; dirs[i] != NULL; ++i) { @@ -176,7 +176,7 @@ create_pcmk_dirs(void) crm_warn("Could not create directory %s: %s", dirs[i], pcmk_rc_str(rc)); } else { - mcp_chown(dirs[i], pcmk_uid, pcmk_gid); + pacemakerd_chown(dirs[i], pcmk_uid, pcmk_gid); } } } @@ -312,7 +312,8 @@ main(int argc, char **argv) goto done; } - pcmk__set_env_option("mcp", "true"); + // @COMPAT Drop at 3.0.0; likely last used in 1.1.24 + pcmk__set_env_option(PCMK__ENV_MCP, "true", true); if (options.shutdown) { pcmk__cli_init_logging("pacemakerd", args->verbosity); @@ -330,7 +331,11 @@ main(int argc, char **argv) } pcmk_register_ipc_callback(old_instance, pacemakerd_event_cb, NULL); - rc = pcmk_connect_ipc(old_instance, pcmk_ipc_dispatch_sync); + rc = pcmk__connect_ipc(old_instance, pcmk_ipc_dispatch_sync, 2); + if (rc != pcmk_rc_ok) { + crm_debug("No existing %s instance found: %s", + pcmk_ipc_name(old_instance, true), pcmk_rc_str(rc)); + } old_instance_connected = pcmk_ipc_is_connected(old_instance); if (options.shutdown) { @@ -388,7 +393,7 @@ main(int argc, char **argv) } #ifdef SUPPORT_COROSYNC - if (mcp_read_config() == FALSE) { + if (pacemakerd_read_config() == FALSE) { crm_exit(CRM_EX_UNAVAILABLE); } #endif @@ -399,7 +404,7 @@ main(int argc, char **argv) if (!pcmk__str_eq(facility, PCMK__VALUE_NONE, pcmk__str_casei|pcmk__str_null_matches)) { - setenv("HA_LOGFACILITY", facility, 1); + pcmk__set_env_option("LOGFACILITY", facility, true); } } @@ -409,7 +414,7 @@ main(int argc, char **argv) remove_core_file_limit(); create_pcmk_dirs(); - pcmk__serve_pacemakerd_ipc(&ipcs, &mcp_ipc_callbacks); + pcmk__serve_pacemakerd_ipc(&ipcs, &pacemakerd_ipc_callbacks); #ifdef SUPPORT_COROSYNC /* Allows us to block shutdown */ @@ -420,10 +425,7 @@ main(int argc, char **argv) #endif if (pcmk__locate_sbd() > 0) { - setenv("PCMK_watchdog", "true", 1); running_with_sbd = TRUE; - } else { - setenv("PCMK_watchdog", "false", 1); } switch (find_and_track_existing_processes()) { diff --git a/daemons/pacemakerd/pacemakerd.h b/daemons/pacemakerd/pacemakerd.h index b2a6864..ee6facf 100644 --- a/daemons/pacemakerd/pacemakerd.h +++ b/daemons/pacemakerd/pacemakerd.h @@ -1,5 +1,5 @@ /* - * Copyright 2010-2022 the Pacemaker project contributors + * Copyright 2010-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -14,7 +14,7 @@ #define MAX_RESPAWN 100 extern GMainLoop *mainloop; -extern struct qb_ipcs_service_handlers mcp_ipc_callbacks; +extern struct qb_ipcs_service_handlers pacemakerd_ipc_callbacks; extern const char *pacemakerd_state; extern gboolean running_with_sbd; extern unsigned int shutdown_complete_state_reported_to; @@ -23,7 +23,7 @@ extern crm_trigger_t *shutdown_trigger; extern crm_trigger_t *startup_trigger; extern time_t subdaemon_check_progress; -gboolean mcp_read_config(void); +gboolean pacemakerd_read_config(void); gboolean cluster_connect_cfg(void); void cluster_disconnect_cfg(void); diff --git a/daemons/pacemakerd/pcmkd_corosync.c b/daemons/pacemakerd/pcmkd_corosync.c index 2648756..8a1a867 100644 --- a/daemons/pacemakerd/pcmkd_corosync.c +++ b/daemons/pacemakerd/pcmkd_corosync.c @@ -1,5 +1,5 @@ /* - * Copyright 2010-2022 the Pacemaker project contributors + * Copyright 2010-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -82,7 +82,7 @@ cluster_reconnect_cb(gpointer data) mainloop_timer_del(reconnect_timer); reconnect_timer = NULL; crm_notice("Cluster reconnect succeeded"); - mcp_read_config(); + pacemakerd_read_config(); restart_cluster_subdaemons(); return G_SOURCE_REMOVE; } else { @@ -260,7 +260,7 @@ get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, ch } gboolean -mcp_read_config(void) +pacemakerd_read_config(void) { cs_error_t rc = CS_OK; int retries = 0; @@ -327,8 +327,10 @@ mcp_read_config(void) crm_info("Reading configuration for %s stack", name_for_cluster_type(stack)); - pcmk__set_env_option(PCMK__ENV_CLUSTER_TYPE, "corosync"); - pcmk__set_env_option(PCMK__ENV_QUORUM_TYPE, "corosync"); + pcmk__set_env_option(PCMK__ENV_CLUSTER_TYPE, "corosync", true); + + // @COMPAT Drop at 3.0.0; added unused in 1.1.9 + pcmk__set_env_option(PCMK__ENV_QUORUM_TYPE, "corosync", true); // If debug logging is not configured, check whether corosync has it if (pcmk__env_option(PCMK__ENV_DEBUG) == NULL) { @@ -337,13 +339,13 @@ mcp_read_config(void) get_config_opt(config, local_handle, "logging.debug", &debug_enabled, "off"); if (crm_is_true(debug_enabled)) { - pcmk__set_env_option(PCMK__ENV_DEBUG, "1"); + pcmk__set_env_option(PCMK__ENV_DEBUG, "1", true); if (get_crm_log_level() < LOG_DEBUG) { set_crm_log_level(LOG_DEBUG); } } else { - pcmk__set_env_option(PCMK__ENV_DEBUG, "0"); + pcmk__set_env_option(PCMK__ENV_DEBUG, "0", true); } free(debug_enabled); diff --git a/daemons/pacemakerd/pcmkd_messages.c b/daemons/pacemakerd/pcmkd_messages.c index 7ed9899..4e6f822 100644 --- a/daemons/pacemakerd/pcmkd_messages.c +++ b/daemons/pacemakerd/pcmkd_messages.c @@ -269,7 +269,7 @@ pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) return 0; } -struct qb_ipcs_service_handlers mcp_ipc_callbacks = { +struct qb_ipcs_service_handlers pacemakerd_ipc_callbacks = { .connection_accept = pcmk_ipc_accept, .connection_created = NULL, .msg_process = pcmk_ipc_dispatch, diff --git a/daemons/pacemakerd/pcmkd_subdaemons.c b/daemons/pacemakerd/pcmkd_subdaemons.c index 3b08ecc..21e432e 100644 --- a/daemons/pacemakerd/pcmkd_subdaemons.c +++ b/daemons/pacemakerd/pcmkd_subdaemons.c @@ -1,5 +1,5 @@ /* - * Copyright 2010-2022 the Pacemaker project contributors + * Copyright 2010-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -307,7 +307,7 @@ pcmk_process_exit(pcmk_child_t * child) } else if (!child->respawn) { /* nothing to do */ - } else if (crm_is_true(getenv("PCMK_fail_fast"))) { + } else if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) { crm_err("Rebooting system because of %s", child->name); pcmk__panic(__func__); @@ -353,8 +353,8 @@ pcmk_shutdown_worker(gpointer user_data) " if it vitally depends on some other daemons" " going down in a controlled way already," " or locate and kill the correct %s process" - " on your own; set PCMK_fail_fast=1 to avoid" - " this altogether next time around", + " on your own; set PCMK_" PCMK__ENV_FAIL_FAST "=1" + " to avoid this altogether next time around", child->name, (long) SHUTDOWN_ESCALATION_PERIOD, child->command); } @@ -389,6 +389,7 @@ pcmk_shutdown_worker(gpointer user_data) return TRUE; } + // @COMPAT Drop shutdown delay at 3.0.0 { const char *delay = pcmk__env_option(PCMK__ENV_SHUTDOWN_DELAY); if(delay) { @@ -423,8 +424,8 @@ start_child(pcmk_child_t * child) gid_t gid = 0; gboolean use_valgrind = FALSE; gboolean use_callgrind = FALSE; - const char *env_valgrind = getenv("PCMK_valgrind_enabled"); - const char *env_callgrind = getenv("PCMK_callgrind_enabled"); + const char *env_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED); + const char *env_callgrind = pcmk__env_option(PCMK__ENV_CALLGRIND_ENABLED); child->active_before_startup = false; child->check_count = 0; @@ -712,14 +713,16 @@ find_and_track_existing_processes(void) continue; } + // @TODO Functionize more of this to reduce nesting pcmk_children[i].respawn_count = rounds; switch (rc) { case pcmk_rc_ok: if (pcmk_children[i].pid == PCMK__SPECIAL_PID) { - if (crm_is_true(getenv("PCMK_fail_fast"))) { + if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) { crm_crit("Cannot reliably track pre-existing" " authentic process behind %s IPC on this" - " platform and PCMK_fail_fast requested", + " platform and PCMK_" PCMK__ENV_FAIL_FAST + " requested", pcmk_children[i].endpoint); return EOPNOTSUPP; } else if (pcmk_children[i].respawn_count == WAIT_TRIES) { @@ -727,9 +730,9 @@ find_and_track_existing_processes(void) " on this platform untrackable, process" " behind %s IPC is stable (was in %d" " previous samples) so rather than" - " bailing out (PCMK_fail_fast not" - " requested), we just switch to a less" - " optimal IPC liveness monitoring" + " bailing out (PCMK_" PCMK__ENV_FAIL_FAST + " not requested), we just switch to a" + " less optimal IPC liveness monitoring" " (not very suitable for heavy load)", pcmk_children[i].name, WAIT_TRIES - 1); crm_warn("The process behind %s IPC cannot be" @@ -822,7 +825,7 @@ init_children_processes(void *user_data) * * This may be useful for the daemons to know */ - setenv("PCMK_respawned", "true", 1); + pcmk__set_env_option(PCMK__ENV_RESPAWNED, "true", false); pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING; return TRUE; } diff --git a/daemons/schedulerd/Makefile.am b/daemons/schedulerd/Makefile.am index 57e819b..fab8e1a 100644 --- a/daemons/schedulerd/Makefile.am +++ b/daemons/schedulerd/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright 2004-2021 the Pacemaker project contributors +# Copyright 2004-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # @@ -10,7 +10,8 @@ include $(top_srcdir)/mk/common.mk include $(top_srcdir)/mk/man.mk -AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir) +AM_CPPFLAGS += -I$(top_builddir) \ + -I$(top_srcdir) halibdir = $(CRM_DAEMON_DIR) @@ -26,27 +27,34 @@ endif noinst_HEADERS = pacemaker-schedulerd.h -pacemaker_schedulerd_CFLAGS = $(CFLAGS_HARDENED_EXE) +pacemaker_schedulerd_CFLAGS = $(CFLAGS_HARDENED_EXE) pacemaker_schedulerd_LDFLAGS = $(LDFLAGS_HARDENED_EXE) -pacemaker_schedulerd_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/pengine/libpe_status.la \ - $(top_builddir)/lib/pacemaker/libpacemaker.la +pacemaker_schedulerd_LDADD = $(top_builddir)/lib/pacemaker/libpacemaker.la +pacemaker_schedulerd_LDADD += $(top_builddir)/lib/pengine/libpe_status.la +pacemaker_schedulerd_LDADD += $(top_builddir)/lib/common/libcrmcommon.la + # libcib for get_object_root() pacemaker_schedulerd_SOURCES = pacemaker-schedulerd.c pacemaker_schedulerd_SOURCES += schedulerd_messages.c +.PHONY: install-exec-local install-exec-local: $(INSTALL) -d -m 750 $(DESTDIR)/$(PE_STATE_DIR) -chown $(CRM_DAEMON_USER):$(CRM_DAEMON_GROUP) $(DESTDIR)/$(PE_STATE_DIR) -if BUILD_LEGACY_LINKS +.PHONY: install-exec-hook install-exec-hook: +if BUILD_LEGACY_LINKS cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f pengine && $(LN_S) pacemaker-schedulerd pengine +endif +.PHONY: uninstall-hook uninstall-hook: +if BUILD_LEGACY_LINKS cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f pengine endif +.PHONY: uninstall-local uninstall-local: -rmdir $(DESTDIR)/$(PE_STATE_DIR) diff --git a/daemons/schedulerd/pacemaker-schedulerd.h b/daemons/schedulerd/pacemaker-schedulerd.h index cbb07e1..75b7d38 100644 --- a/daemons/schedulerd/pacemaker-schedulerd.h +++ b/daemons/schedulerd/pacemaker-schedulerd.h @@ -1,5 +1,5 @@ /* - * Copyright 2004-2022 the Pacemaker project contributors + * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -11,7 +11,7 @@ #define PCMK__PACEMAKER_SCHEDULERD__H #include <crm_internal.h> -#include <crm/pengine/pe_types.h> +#include <crm/common/scheduler.h> extern pcmk__output_t *logger_out; extern pcmk__output_t *out; diff --git a/daemons/schedulerd/schedulerd_messages.c b/daemons/schedulerd/schedulerd_messages.c index 1c124d2..5a97365 100644 --- a/daemons/schedulerd/schedulerd_messages.c +++ b/daemons/schedulerd/schedulerd_messages.c @@ -1,5 +1,5 @@ /* - * Copyright 2004-2022 the Pacemaker project contributors + * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -22,12 +22,12 @@ static GHashTable *schedulerd_handlers = NULL; -static pe_working_set_t * +static pcmk_scheduler_t * init_working_set(void) { - pe_working_set_t *data_set = pe_new_working_set(); + pcmk_scheduler_t *scheduler = pe_new_working_set(); - CRM_ASSERT(data_set != NULL); + CRM_ASSERT(scheduler != NULL); crm_config_error = FALSE; crm_config_warning = FALSE; @@ -35,8 +35,8 @@ init_working_set(void) was_processing_error = FALSE; was_processing_warning = FALSE; - data_set->priv = logger_out; - return data_set; + scheduler->priv = logger_out; + return scheduler; } static xmlNode * @@ -72,7 +72,7 @@ handle_pecalc_request(pcmk__request_t *request) xmlNode *reply = NULL; bool is_repoke = false; bool process = true; - pe_working_set_t *data_set = init_working_set(); + pcmk_scheduler_t *scheduler = init_working_set(); pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, "ack", NULL, CRM_EX_INDETERMINATE); @@ -81,9 +81,9 @@ handle_pecalc_request(pcmk__request_t *request) CRM_FEATURE_SET); converted = copy_xml(xml_data); if (!cli_config_update(&converted, NULL, TRUE)) { - data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); - crm_xml_add_int(data_set->graph, "transition_id", 0); - crm_xml_add_int(data_set->graph, "cluster-delay", 0); + scheduler->graph = create_xml_node(NULL, XML_TAG_GRAPH); + crm_xml_add_int(scheduler->graph, "transition_id", 0); + crm_xml_add_int(scheduler->graph, "cluster-delay", 0); process = false; free(digest); @@ -98,9 +98,9 @@ handle_pecalc_request(pcmk__request_t *request) if (process) { pcmk__schedule_actions(converted, - pe_flag_no_counts - |pe_flag_no_compat - |pe_flag_show_utilization, data_set); + pcmk_sched_no_counts + |pcmk_sched_no_compat + |pcmk_sched_show_utilization, scheduler); } // Get appropriate index into series[] array @@ -112,7 +112,7 @@ handle_pecalc_request(pcmk__request_t *request) series_id = 2; } - value = pe_pref(data_set->config_hash, series[series_id].param); + value = pe_pref(scheduler->config_hash, series[series_id].param); if ((value == NULL) || (pcmk__scan_min_int(value, &series_wrap, -1) != pcmk_rc_ok)) { series_wrap = series[series_id].wrap; @@ -126,8 +126,8 @@ handle_pecalc_request(pcmk__request_t *request) crm_trace("Series %s: wrap=%d, seq=%u, pref=%s", series[series_id].name, series_wrap, seq, value); - data_set->input = NULL; - reply = create_reply(msg, data_set->graph); + scheduler->input = NULL; + reply = create_reply(msg, scheduler->graph); if (reply == NULL) { pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, @@ -172,7 +172,7 @@ handle_pecalc_request(pcmk__request_t *request) done: free_xml(converted); - pe_free_working_set(data_set); + pe_free_working_set(scheduler); return reply; } |