diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 06:53:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 06:53:20 +0000 |
commit | e5a812082ae033afb1eed82c0f2df3d0f6bdc93f (patch) | |
tree | a6716c9275b4b413f6c9194798b34b91affb3cc7 /daemons/attrd | |
parent | Initial commit. (diff) | |
download | pacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.tar.xz pacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.zip |
Adding upstream version 2.1.6.upstream/2.1.6
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'daemons/attrd')
-rw-r--r-- | daemons/attrd/Makefile.am | 48 | ||||
-rw-r--r-- | daemons/attrd/attrd_alerts.c | 145 | ||||
-rw-r--r-- | daemons/attrd/attrd_attributes.c | 188 | ||||
-rw-r--r-- | daemons/attrd/attrd_cib.c | 380 | ||||
-rw-r--r-- | daemons/attrd/attrd_corosync.c | 620 | ||||
-rw-r--r-- | daemons/attrd/attrd_elections.c | 179 | ||||
-rw-r--r-- | daemons/attrd/attrd_ipc.c | 628 | ||||
-rw-r--r-- | daemons/attrd/attrd_messages.c | 328 | ||||
-rw-r--r-- | daemons/attrd/attrd_sync.c | 577 | ||||
-rw-r--r-- | daemons/attrd/attrd_utils.c | 362 | ||||
-rw-r--r-- | daemons/attrd/pacemaker-attrd.c | 358 | ||||
-rw-r--r-- | daemons/attrd/pacemaker-attrd.h | 216 |
12 files changed, 4029 insertions, 0 deletions
diff --git a/daemons/attrd/Makefile.am b/daemons/attrd/Makefile.am new file mode 100644 index 0000000..6bb81c4 --- /dev/null +++ b/daemons/attrd/Makefile.am @@ -0,0 +1,48 @@ +# +# Copyright 2004-2022 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. +# + +include $(top_srcdir)/mk/common.mk + +halibdir = $(CRM_DAEMON_DIR) + +halib_PROGRAMS = pacemaker-attrd + +noinst_HEADERS = pacemaker-attrd.h + +pacemaker_attrd_CFLAGS = $(CFLAGS_HARDENED_EXE) +pacemaker_attrd_LDFLAGS = $(LDFLAGS_HARDENED_EXE) + +pacemaker_attrd_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \ + $(top_builddir)/lib/pengine/libpe_rules.la \ + $(top_builddir)/lib/common/libcrmcommon.la \ + $(top_builddir)/lib/cib/libcib.la \ + $(top_builddir)/lib/lrmd/liblrmd.la \ + $(CLUSTERLIBS) + +pacemaker_attrd_SOURCES = attrd_alerts.c \ + attrd_attributes.c \ + attrd_cib.c \ + attrd_corosync.c \ + attrd_elections.c \ + attrd_ipc.c \ + attrd_messages.c \ + attrd_sync.c \ + attrd_utils.c \ + pacemaker-attrd.c + +clean-generic: + rm -f *.log *.debug *.xml *~ + +if BUILD_LEGACY_LINKS +install-exec-hook: + cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f attrd && $(LN_S) pacemaker-attrd attrd + +uninstall-hook: + cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f attrd +endif diff --git a/daemons/attrd/attrd_alerts.c b/daemons/attrd/attrd_alerts.c new file mode 100644 index 0000000..b694891 --- /dev/null +++ b/daemons/attrd/attrd_alerts.c @@ -0,0 +1,145 @@ +/* + * Copyright 2015-2021 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> +#include <crm/crm.h> +#include <crm/cib/internal.h> +#include <crm/msg_xml.h> +#include <crm/cluster/internal.h> +#include <crm/cluster/election_internal.h> +#include <crm/common/alerts_internal.h> +#include <crm/pengine/rules_internal.h> +#include <crm/lrmd_internal.h> +#include "pacemaker-attrd.h" + +static GList *attrd_alert_list = NULL; + +static void +attrd_lrmd_callback(lrmd_event_data_t * op) +{ + CRM_CHECK(op != NULL, return); + switch (op->type) { + case lrmd_event_disconnect: + crm_info("Lost connection to executor"); + attrd_lrmd_disconnect(); + break; + default: + break; + } +} + +static lrmd_t * +attrd_lrmd_connect(void) +{ + if (the_lrmd == NULL) { + the_lrmd = lrmd_api_new(); + the_lrmd->cmds->set_callback(the_lrmd, attrd_lrmd_callback); + } + + if (!the_lrmd->cmds->is_connected(the_lrmd)) { + const unsigned int max_attempts = 10; + int ret = -ENOTCONN; + + for (int fails = 0; fails < max_attempts; ++fails) { + ret = the_lrmd->cmds->connect(the_lrmd, T_ATTRD, NULL); + if (ret == pcmk_ok) { + break; + } + + crm_debug("Could not connect to executor, %d tries remaining", + (max_attempts - fails)); + /* @TODO We don't want to block here with sleep, but we should wait + * some time between connection attempts. We could possibly add a + * timer with a callback, but then we'd likely need an alert queue. + */ + } + + if (ret != pcmk_ok) { + attrd_lrmd_disconnect(); + } + } + + return the_lrmd; +} + +void +attrd_lrmd_disconnect(void) { + if (the_lrmd) { + lrmd_t *conn = the_lrmd; + + the_lrmd = NULL; /* in case we're called recursively */ + lrmd_api_delete(conn); /* will disconnect if necessary */ + } +} + +static void +config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + xmlNode *crmalerts = NULL; + + if (rc == -ENXIO) { + crm_debug("Local CIB has no alerts section"); + return; + } else if (rc != pcmk_ok) { + crm_notice("Could not query local CIB: %s", pcmk_strerror(rc)); + return; + } + + crmalerts = output; + if (crmalerts && !pcmk__str_eq(crm_element_name(crmalerts), XML_CIB_TAG_ALERTS, pcmk__str_none)) { + crmalerts = first_named_child(crmalerts, XML_CIB_TAG_ALERTS); + } + if (!crmalerts) { + crm_notice("CIB query result has no " XML_CIB_TAG_ALERTS " section"); + return; + } + + pe_free_alert_list(attrd_alert_list); + attrd_alert_list = pe_unpack_alerts(crmalerts); +} + +#define XPATH_ALERTS \ + "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_ALERTS + +gboolean +attrd_read_options(gpointer user_data) +{ + int call_id; + + CRM_CHECK(the_cib != NULL, return TRUE); + + call_id = the_cib->cmds->query(the_cib, XPATH_ALERTS, NULL, + cib_xpath | cib_scope_local); + + the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, NULL, + "config_query_callback", + config_query_callback, free); + + crm_trace("Querying the CIB... call %d", call_id); + return TRUE; +} + +void +attrd_cib_updated_cb(const char *event, xmlNode * msg) +{ + if (!attrd_shutting_down() && pcmk__alert_in_patchset(msg, false)) { + mainloop_set_trigger(attrd_config_read); + } +} + +int +attrd_send_attribute_alert(const char *node, int nodeid, + const char *attr, const char *value) +{ + if (attrd_alert_list == NULL) { + return pcmk_ok; + } + return lrmd_send_attribute_alert(attrd_lrmd_connect(), attrd_alert_list, + node, nodeid, attr, value); +} diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c new file mode 100644 index 0000000..516ced7 --- /dev/null +++ b/daemons/attrd/attrd_attributes.c @@ -0,0 +1,188 @@ +/* + * Copyright 2013-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <errno.h> +#include <stdbool.h> +#include <stdlib.h> +#include <glib.h> + +#include <crm/msg_xml.h> +#include <crm/common/logging.h> +#include <crm/common/results.h> +#include <crm/common/strings_internal.h> +#include <crm/common/xml.h> + +#include "pacemaker-attrd.h" + +static attribute_t * +attrd_create_attribute(xmlNode *xml) +{ + int dampen = 0; + const char *value = crm_element_value(xml, PCMK__XA_ATTR_DAMPENING); + attribute_t *a = calloc(1, sizeof(attribute_t)); + + CRM_ASSERT(a != NULL); + + a->id = crm_element_value_copy(xml, PCMK__XA_ATTR_NAME); + a->set_id = crm_element_value_copy(xml, PCMK__XA_ATTR_SET); + a->set_type = crm_element_value_copy(xml, PCMK__XA_ATTR_SET_TYPE); + a->uuid = crm_element_value_copy(xml, PCMK__XA_ATTR_UUID); + a->values = pcmk__strikey_table(NULL, attrd_free_attribute_value); + + crm_element_value_int(xml, PCMK__XA_ATTR_IS_PRIVATE, &a->is_private); + + a->user = crm_element_value_copy(xml, PCMK__XA_ATTR_USER); + crm_trace("Performing all %s operations as user '%s'", a->id, a->user); + + if (value != NULL) { + dampen = crm_get_msec(value); + } + crm_trace("Created attribute %s with %s write delay", a->id, + (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms)); + + if(dampen > 0) { + a->timeout_ms = dampen; + a->timer = attrd_add_timer(a->id, a->timeout_ms, a); + } else if (dampen < 0) { + crm_warn("Ignoring invalid delay %s for attribute %s", value, a->id); + } + + g_hash_table_replace(attributes, a->id, a); + return a; +} + +static int +attrd_update_dampening(attribute_t *a, xmlNode *xml, const char *attr) +{ + const char *dvalue = crm_element_value(xml, PCMK__XA_ATTR_DAMPENING); + int dampen = 0; + + if (dvalue == NULL) { + crm_warn("Could not update %s: peer did not specify value for delay", + attr); + return EINVAL; + } + + dampen = crm_get_msec(dvalue); + if (dampen < 0) { + crm_warn("Could not update %s: invalid delay value %dms (%s)", + attr, dampen, dvalue); + return EINVAL; + } + + if (a->timeout_ms != dampen) { + mainloop_timer_del(a->timer); + a->timeout_ms = dampen; + if (dampen > 0) { + a->timer = attrd_add_timer(attr, a->timeout_ms, a); + crm_info("Update attribute %s delay to %dms (%s)", + attr, dampen, dvalue); + } else { + a->timer = NULL; + crm_info("Update attribute %s to remove delay", attr); + } + + /* If dampening changed, do an immediate write-out, + * otherwise repeated dampening changes would prevent write-outs + */ + attrd_write_or_elect_attribute(a); + } + + return pcmk_rc_ok; +} + +GHashTable *attributes = NULL; + +/*! + * \internal + * \brief Create an XML representation of an attribute for use in peer messages + * + * \param[in,out] parent Create attribute XML as child element of this + * \param[in] a Attribute to represent + * \param[in] v Attribute value to represent + * \param[in] force_write If true, value should be written even if unchanged + * + * \return XML representation of attribute + */ +xmlNode * +attrd_add_value_xml(xmlNode *parent, const attribute_t *a, + const attribute_value_t *v, bool force_write) +{ + xmlNode *xml = create_xml_node(parent, __func__); + + crm_xml_add(xml, PCMK__XA_ATTR_NAME, a->id); + crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set_id); + crm_xml_add(xml, PCMK__XA_ATTR_UUID, a->uuid); + crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user); + pcmk__xe_add_node(xml, v->nodename, v->nodeid); + if (v->is_remote != 0) { + crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE, 1); + } + crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current); + crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, a->timeout_ms / 1000); + crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, a->is_private); + crm_xml_add_int(xml, PCMK__XA_ATTR_FORCE, force_write); + + return xml; +} + +void +attrd_clear_value_seen(void) +{ + GHashTableIter aIter; + GHashTableIter vIter; + attribute_t *a; + attribute_value_t *v = NULL; + + g_hash_table_iter_init(&aIter, attributes); + while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { + g_hash_table_iter_init(&vIter, a->values); + while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { + v->seen = FALSE; + crm_trace("Clear seen flag %s[%s] = %s.", a->id, v->nodename, v->current); + } + } +} + +attribute_t * +attrd_populate_attribute(xmlNode *xml, const char *attr) +{ + attribute_t *a = NULL; + bool update_both = false; + + const char *op = crm_element_value(xml, PCMK__XA_TASK); + + // NULL because PCMK__ATTRD_CMD_SYNC_RESPONSE has no PCMK__XA_TASK + update_both = pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_BOTH, + pcmk__str_null_matches); + + // Look up or create attribute entry + a = g_hash_table_lookup(attributes, attr); + if (a == NULL) { + if (update_both || pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE, pcmk__str_none)) { + a = attrd_create_attribute(xml); + } else { + crm_warn("Could not update %s: attribute not found", attr); + return NULL; + } + } + + // Update attribute dampening + if (update_both || pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) { + int rc = attrd_update_dampening(a, xml, attr); + + if (rc != pcmk_rc_ok || !update_both) { + return NULL; + } + } + + return a; +} diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c new file mode 100644 index 0000000..928c013 --- /dev/null +++ b/daemons/attrd/attrd_cib.c @@ -0,0 +1,380 @@ +/* + * Copyright 2013-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <errno.h> +#include <stdbool.h> +#include <stdlib.h> +#include <glib.h> + +#include <crm/msg_xml.h> +#include <crm/common/logging.h> +#include <crm/common/results.h> +#include <crm/common/strings_internal.h> +#include <crm/common/xml.h> + +#include "pacemaker-attrd.h" + +static int last_cib_op_done = 0; + +static gboolean +attribute_timer_cb(gpointer data) +{ + attribute_t *a = data; + crm_trace("Dampen interval expired for %s", a->id); + attrd_write_or_elect_attribute(a); + return FALSE; +} + +static void +attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) +{ + int level = LOG_ERR; + GHashTableIter iter; + const char *peer = NULL; + attribute_value_t *v = NULL; + + char *name = user_data; + attribute_t *a = g_hash_table_lookup(attributes, name); + + if(a == NULL) { + crm_info("Attribute %s no longer exists", name); + return; + } + + a->update = 0; + if (rc == pcmk_ok && call_id < 0) { + rc = call_id; + } + + switch (rc) { + case pcmk_ok: + level = LOG_INFO; + last_cib_op_done = call_id; + if (a->timer && !a->timeout_ms) { + // Remove temporary dampening for failed writes + mainloop_timer_del(a->timer); + a->timer = NULL; + } + break; + + case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */ + case -ETIME: /* When an attr changes while there is a DC election */ + case -ENXIO: /* When an attr changes while the CIB is syncing a + * newer config from a node that just came up + */ + level = LOG_WARNING; + break; + } + + do_crm_log(level, "CIB update %d result for %s: %s " CRM_XS " rc=%d", + call_id, a->id, pcmk_strerror(rc), rc); + + g_hash_table_iter_init(&iter, a->values); + while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) { + do_crm_log(level, "* %s[%s]=%s", a->id, peer, v->requested); + free(v->requested); + v->requested = NULL; + if (rc != pcmk_ok) { + a->changed = true; /* Attempt write out again */ + } + } + + if (a->changed && attrd_election_won()) { + if (rc == pcmk_ok) { + /* We deferred a write of a new update because this update was in + * progress. Write out the new value without additional delay. + */ + attrd_write_attribute(a, false); + + /* We're re-attempting a write because the original failed; delay + * the next attempt so we don't potentially flood the CIB manager + * and logs with a zillion attempts per second. + * + * @TODO We could elect a new writer instead. However, we'd have to + * somehow downgrade our vote, and we'd still need something like this + * if all peers similarly fail to write this attribute (which may + * indicate a corrupted attribute entry rather than a CIB issue). + */ + } else if (a->timer) { + // Attribute has a dampening value, so use that as delay + if (!mainloop_timer_running(a->timer)) { + crm_trace("Delayed re-attempted write for %s by %s", + name, pcmk__readable_interval(a->timeout_ms)); + mainloop_timer_start(a->timer); + } + } else { + /* Set a temporary dampening of 2 seconds (timer will continue + * to exist until the attribute's dampening gets set or the + * write succeeds). + */ + a->timer = attrd_add_timer(a->id, 2000, a); + mainloop_timer_start(a->timer); + } + } +} + +static void +build_update_element(xmlNode *parent, attribute_t *a, const char *nodeid, const char *value) +{ + const char *set = NULL; + xmlNode *xml_obj = NULL; + + xml_obj = create_xml_node(parent, XML_CIB_TAG_STATE); + crm_xml_add(xml_obj, XML_ATTR_ID, nodeid); + + xml_obj = create_xml_node(xml_obj, XML_TAG_TRANSIENT_NODEATTRS); + crm_xml_add(xml_obj, XML_ATTR_ID, nodeid); + + if (pcmk__str_eq(a->set_type, XML_TAG_ATTR_SETS, pcmk__str_null_matches)) { + xml_obj = create_xml_node(xml_obj, XML_TAG_ATTR_SETS); + } else if (pcmk__str_eq(a->set_type, XML_TAG_UTILIZATION, pcmk__str_none)) { + xml_obj = create_xml_node(xml_obj, XML_TAG_UTILIZATION); + } else { + crm_err("Unknown set type attribute: %s", a->set_type); + } + + if (a->set_id) { + crm_xml_set_id(xml_obj, "%s", a->set_id); + } else { + crm_xml_set_id(xml_obj, "%s-%s", XML_CIB_TAG_STATUS, nodeid); + } + set = ID(xml_obj); + + xml_obj = create_xml_node(xml_obj, XML_CIB_TAG_NVPAIR); + if (a->uuid) { + crm_xml_set_id(xml_obj, "%s", a->uuid); + } else { + crm_xml_set_id(xml_obj, "%s-%s", set, a->id); + } + crm_xml_add(xml_obj, XML_NVPAIR_ATTR_NAME, a->id); + + if(value) { + crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, value); + + } else { + crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, ""); + crm_xml_add(xml_obj, "__delete__", XML_NVPAIR_ATTR_VALUE); + } +} + +static void +send_alert_attributes_value(attribute_t *a, GHashTable *t) +{ + int rc = 0; + attribute_value_t *at = NULL; + GHashTableIter vIter; + + g_hash_table_iter_init(&vIter, t); + + while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & at)) { + rc = attrd_send_attribute_alert(at->nodename, at->nodeid, + a->id, at->current); + crm_trace("Sent alerts for %s[%s]=%s: nodeid=%d rc=%d", + a->id, at->nodename, at->current, at->nodeid, rc); + } +} + +static void +set_alert_attribute_value(GHashTable *t, attribute_value_t *v) +{ + attribute_value_t *a_v = NULL; + a_v = calloc(1, sizeof(attribute_value_t)); + CRM_ASSERT(a_v != NULL); + + a_v->nodeid = v->nodeid; + a_v->nodename = strdup(v->nodename); + pcmk__str_update(&a_v->current, v->current); + + g_hash_table_replace(t, a_v->nodename, a_v); +} + +mainloop_timer_t * +attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr) +{ + return mainloop_timer_add(id, timeout_ms, FALSE, attribute_timer_cb, attr); +} + +void +attrd_write_attribute(attribute_t *a, bool ignore_delay) +{ + int private_updates = 0, cib_updates = 0; + xmlNode *xml_top = NULL; + attribute_value_t *v = NULL; + GHashTableIter iter; + enum cib_call_options flags = cib_none; + GHashTable *alert_attribute_value = NULL; + + if (a == NULL) { + return; + } + + /* If this attribute will be written to the CIB ... */ + if (!stand_alone && !a->is_private) { + + /* Defer the write if now's not a good time */ + CRM_CHECK(the_cib != NULL, return); + if (a->update && (a->update < last_cib_op_done)) { + crm_info("Write out of '%s' continuing: update %d considered lost", a->id, a->update); + a->update = 0; // Don't log this message again + + } else if (a->update) { + crm_info("Write out of '%s' delayed: update %d in progress", a->id, a->update); + return; + + } else if (mainloop_timer_running(a->timer)) { + if (ignore_delay) { + /* 'refresh' forces a write of the current value of all attributes + * Cancel any existing timers, we're writing it NOW + */ + mainloop_timer_stop(a->timer); + crm_debug("Write out of '%s': timer is running but ignore delay", a->id); + } else { + crm_info("Write out of '%s' delayed: timer is running", a->id); + return; + } + } + + /* Initialize the status update XML */ + xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS); + } + + /* Attribute will be written shortly, so clear changed flag */ + a->changed = false; + + /* We will check all peers' uuids shortly, so initialize this to false */ + a->unknown_peer_uuids = false; + + /* Attribute will be written shortly, so clear forced write flag */ + a->force_write = FALSE; + + /* Make the table for the attribute trap */ + alert_attribute_value = pcmk__strikey_table(NULL, attrd_free_attribute_value); + + /* Iterate over each peer value of this attribute */ + g_hash_table_iter_init(&iter, a->values); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & v)) { + crm_node_t *peer = crm_get_peer_full(v->nodeid, v->nodename, CRM_GET_PEER_ANY); + + /* If the value's peer info does not correspond to a peer, ignore it */ + if (peer == NULL) { + crm_notice("Cannot update %s[%s]=%s because peer not known", + a->id, v->nodename, v->current); + continue; + } + + /* If we're just learning the peer's node id, remember it */ + if (peer->id && (v->nodeid == 0)) { + crm_trace("Learned ID %u for node %s", peer->id, v->nodename); + v->nodeid = peer->id; + } + + /* If this is a private attribute, no update needs to be sent */ + if (stand_alone || a->is_private) { + private_updates++; + continue; + } + + /* If the peer is found, but its uuid is unknown, defer write */ + if (peer->uuid == NULL) { + a->unknown_peer_uuids = true; + crm_notice("Cannot update %s[%s]=%s because peer UUID not known " + "(will retry if learned)", + a->id, v->nodename, v->current); + continue; + } + + /* Add this value to status update XML */ + crm_debug("Updating %s[%s]=%s (peer known as %s, UUID %s, ID %u/%u)", + a->id, v->nodename, v->current, + peer->uname, peer->uuid, peer->id, v->nodeid); + build_update_element(xml_top, a, peer->uuid, v->current); + cib_updates++; + + /* Preservation of the attribute to transmit alert */ + set_alert_attribute_value(alert_attribute_value, v); + + free(v->requested); + v->requested = NULL; + if (v->current) { + v->requested = strdup(v->current); + } else { + /* Older attrd versions don't know about the cib_mixed_update + * flag so make sure it goes to the local cib which does + */ + cib__set_call_options(flags, crm_system_name, + cib_mixed_update|cib_scope_local); + } + } + + if (private_updates) { + crm_info("Processed %d private change%s for %s, id=%s, set=%s", + private_updates, pcmk__plural_s(private_updates), + a->id, pcmk__s(a->uuid, "n/a"), pcmk__s(a->set_id, "n/a")); + } + if (cib_updates) { + crm_log_xml_trace(xml_top, __func__); + + a->update = cib_internal_op(the_cib, PCMK__CIB_REQUEST_MODIFY, NULL, + XML_CIB_TAG_STATUS, xml_top, NULL, flags, + a->user); + + crm_info("Sent CIB request %d with %d change%s for %s (id %s, set %s)", + a->update, cib_updates, pcmk__plural_s(cib_updates), + a->id, pcmk__s(a->uuid, "n/a"), pcmk__s(a->set_id, "n/a")); + + the_cib->cmds->register_callback_full(the_cib, a->update, + CIB_OP_TIMEOUT_S, FALSE, + strdup(a->id), + "attrd_cib_callback", + attrd_cib_callback, free); + /* Transmit alert of the attribute */ + send_alert_attributes_value(a, alert_attribute_value); + } + + g_hash_table_destroy(alert_attribute_value); + free_xml(xml_top); +} + +void +attrd_write_attributes(bool all, bool ignore_delay) +{ + GHashTableIter iter; + attribute_t *a = NULL; + + crm_debug("Writing out %s attributes", all? "all" : "changed"); + g_hash_table_iter_init(&iter, attributes); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) { + if (!all && a->unknown_peer_uuids) { + // Try writing this attribute again, in case peer ID was learned + a->changed = true; + } else if (a->force_write) { + /* If the force_write flag is set, write the attribute. */ + a->changed = true; + } + + if(all || a->changed) { + /* When forced write flag is set, ignore delay. */ + attrd_write_attribute(a, (a->force_write ? true : ignore_delay)); + } else { + crm_trace("Skipping unchanged attribute %s", a->id); + } + } +} + +void +attrd_write_or_elect_attribute(attribute_t *a) +{ + if (attrd_election_won()) { + attrd_write_attribute(a, false); + } else { + attrd_start_election_if_needed(); + } +} diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c new file mode 100644 index 0000000..ef205e6 --- /dev/null +++ b/daemons/attrd/attrd_corosync.c @@ -0,0 +1,620 @@ +/* + * Copyright 2013-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <errno.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> + +#include <crm/cluster.h> +#include <crm/cluster/internal.h> +#include <crm/common/logging.h> +#include <crm/common/results.h> +#include <crm/common/strings_internal.h> +#include <crm/msg_xml.h> + +#include "pacemaker-attrd.h" + +extern crm_exit_t attrd_exit_status; + +static xmlNode * +attrd_confirmation(int callid) +{ + xmlNode *node = create_xml_node(NULL, __func__); + + crm_xml_add(node, F_TYPE, T_ATTRD); + crm_xml_add(node, F_ORIG, get_local_node_name()); + crm_xml_add(node, PCMK__XA_TASK, PCMK__ATTRD_CMD_CONFIRM); + crm_xml_add_int(node, XML_LRM_ATTR_CALLID, callid); + + return node; +} + +static void +attrd_peer_message(crm_node_t *peer, xmlNode *xml) +{ + const char *election_op = crm_element_value(xml, F_CRM_TASK); + + if (election_op) { + attrd_handle_election_op(peer, xml); + return; + } + + if (attrd_shutting_down()) { + /* If we're shutting down, we want to continue responding to election + * ops as long as we're a cluster member (because our vote may be + * needed). Ignore all other messages. + */ + return; + + } else { + pcmk__request_t request = { + .ipc_client = NULL, + .ipc_id = 0, + .ipc_flags = 0, + .peer = peer->uname, + .xml = xml, + .call_options = 0, + .result = PCMK__UNKNOWN_RESULT, + }; + + request.op = crm_element_value_copy(request.xml, PCMK__XA_TASK); + CRM_CHECK(request.op != NULL, return); + + attrd_handle_request(&request); + + /* Having finished handling the request, check to see if the originating + * peer requested confirmation. If so, send that confirmation back now. + */ + if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) && + !pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { + int callid = 0; + xmlNode *reply = NULL; + + /* Add the confirmation ID for the message we are confirming to the + * response so the originating peer knows what they're a confirmation + * for. + */ + crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid); + reply = attrd_confirmation(callid); + + /* And then send the confirmation back to the originating peer. This + * ends up right back in this same function (attrd_peer_message) on the + * peer where it will have to do something with a PCMK__XA_CONFIRM type + * message. + */ + crm_debug("Sending %s a confirmation", peer->uname); + attrd_send_message(peer, reply, false); + free_xml(reply); + } + + pcmk__reset_request(&request); + } +} + +static void +attrd_cpg_dispatch(cpg_handle_t handle, + const struct cpg_name *groupName, + uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) +{ + uint32_t kind = 0; + xmlNode *xml = NULL; + const char *from = NULL; + char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); + + if(data == NULL) { + return; + } + + if (kind == crm_class_cluster) { + xml = string2xml(data); + } + + if (xml == NULL) { + crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data); + } else { + crm_node_t *peer = crm_get_peer(nodeid, from); + + attrd_peer_message(peer, xml); + } + + free_xml(xml); + free(data); +} + +static void +attrd_cpg_destroy(gpointer unused) +{ + if (attrd_shutting_down()) { + crm_info("Corosync disconnection complete"); + + } else { + crm_crit("Lost connection to cluster layer, shutting down"); + attrd_exit_status = CRM_EX_DISCONNECT; + attrd_shutdown(0); + } +} + +/*! + * \internal + * \brief Override an attribute sync with a local value + * + * Broadcast the local node's value for an attribute that's different from the + * value provided in a peer's attribute synchronization response. This ensures a + * node's values for itself take precedence and all peers are kept in sync. + * + * \param[in] a Attribute entry to override + * + * \return Local instance of attribute value + */ +static attribute_value_t * +broadcast_local_value(const attribute_t *a) +{ + attribute_value_t *v = g_hash_table_lookup(a->values, attrd_cluster->uname); + xmlNode *sync = create_xml_node(NULL, __func__); + + crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); + attrd_add_value_xml(sync, a, v, false); + attrd_send_message(NULL, sync, false); + free_xml(sync); + return v; +} + +/*! + * \internal + * \brief Ensure a Pacemaker Remote node is in the correct peer cache + * + * \param[in] node_name Name of Pacemaker Remote node to check + */ +static void +cache_remote_node(const char *node_name) +{ + /* If we previously assumed this node was an unseen cluster node, + * remove its entry from the cluster peer cache. + */ + crm_node_t *dup = pcmk__search_cluster_node_cache(0, node_name); + + if (dup && (dup->uuid == NULL)) { + reap_crm_member(0, node_name); + } + + // Ensure node is in the remote peer cache + CRM_ASSERT(crm_remote_peer_get(node_name) != NULL); +} + +#define state_text(state) pcmk__s((state), "in unknown state") + +/*! + * \internal + * \brief Return host's hash table entry (creating one if needed) + * + * \param[in,out] values Hash table of values + * \param[in] host Name of peer to look up + * \param[in] xml XML describing the attribute + * + * \return Pointer to new or existing hash table entry + */ +static attribute_value_t * +attrd_lookup_or_create_value(GHashTable *values, const char *host, + const xmlNode *xml) +{ + attribute_value_t *v = g_hash_table_lookup(values, host); + int is_remote = 0; + + crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); + if (is_remote) { + cache_remote_node(host); + } + + if (v == NULL) { + v = calloc(1, sizeof(attribute_value_t)); + CRM_ASSERT(v != NULL); + + pcmk__str_update(&v->nodename, host); + v->is_remote = is_remote; + g_hash_table_replace(values, v->nodename, v); + } + return(v); +} + +static void +attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data) +{ + bool gone = false; + bool is_remote = pcmk_is_set(peer->flags, crm_remote_node); + + switch (kind) { + case crm_status_uname: + crm_debug("%s node %s is now %s", + (is_remote? "Remote" : "Cluster"), + peer->uname, state_text(peer->state)); + break; + + case crm_status_processes: + if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) { + gone = true; + } + crm_debug("Node %s is %s a peer", + peer->uname, (gone? "no longer" : "now")); + break; + + case crm_status_nstate: + crm_debug("%s node %s is now %s (was %s)", + (is_remote? "Remote" : "Cluster"), + peer->uname, state_text(peer->state), state_text(data)); + if (pcmk__str_eq(peer->state, CRM_NODE_MEMBER, pcmk__str_casei)) { + /* If we're the writer, send new peers a list of all attributes + * (unless it's a remote node, which doesn't run its own attrd) + */ + if (attrd_election_won() + && !pcmk_is_set(peer->flags, crm_remote_node)) { + attrd_peer_sync(peer, NULL); + } + } else { + // Remove all attribute values associated with lost nodes + attrd_peer_remove(peer->uname, false, "loss"); + gone = true; + } + break; + } + + // Remove votes from cluster nodes that leave, in case election in progress + if (gone && !is_remote) { + attrd_remove_voter(peer); + attrd_remove_peer_protocol_ver(peer->uname); + attrd_do_not_expect_from_peer(peer->uname); + + // Ensure remote nodes that come up are in the remote node cache + } else if (!gone && is_remote) { + cache_remote_node(peer->uname); + } +} + +static void +record_peer_nodeid(attribute_value_t *v, const char *host) +{ + crm_node_t *known_peer = crm_get_peer(v->nodeid, host); + + crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid); + if (attrd_election_won()) { + attrd_write_attributes(false, false); + } +} + +static void +update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, + const char *attr, const char *value, const char *host, + bool filter, int is_force_write) +{ + attribute_value_t *v = NULL; + + v = attrd_lookup_or_create_value(a->values, host, xml); + + if (filter && !pcmk__str_eq(v->current, value, pcmk__str_casei) + && pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)) { + + crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s", + attr, host, v->current, value, peer->uname); + v = broadcast_local_value(a); + + } else if (!pcmk__str_eq(v->current, value, pcmk__str_casei)) { + crm_notice("Setting %s[%s]%s%s: %s -> %s " + CRM_XS " from %s with %s write delay", + attr, host, a->set_type ? " in " : "", + pcmk__s(a->set_type, ""), pcmk__s(v->current, "(unset)"), + pcmk__s(value, "(unset)"), peer->uname, + (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms)); + pcmk__str_update(&v->current, value); + a->changed = true; + + if (pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei) + && pcmk__str_eq(attr, XML_CIB_ATTR_SHUTDOWN, pcmk__str_none)) { + + if (!pcmk__str_eq(value, "0", pcmk__str_null_matches)) { + attrd_set_requesting_shutdown(); + + } else { + attrd_clear_requesting_shutdown(); + } + } + + // Write out new value or start dampening timer + if (a->timeout_ms && a->timer) { + crm_trace("Delayed write out (%dms) for %s", a->timeout_ms, attr); + mainloop_timer_start(a->timer); + } else { + attrd_write_or_elect_attribute(a); + } + + } else { + if (is_force_write == 1 && a->timeout_ms && a->timer) { + /* Save forced writing and set change flag. */ + /* The actual attribute is written by Writer after election. */ + crm_trace("Unchanged %s[%s] from %s is %s(Set the forced write flag)", + attr, host, peer->uname, value); + a->force_write = TRUE; + } else { + crm_trace("Unchanged %s[%s] from %s is %s", attr, host, peer->uname, value); + } + } + + /* Set the seen flag for attribute processing held only in the own node. */ + v->seen = TRUE; + + /* If this is a cluster node whose node ID we are learning, remember it */ + if ((v->nodeid == 0) && (v->is_remote == FALSE) + && (crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID, + (int*)&v->nodeid) == 0) && (v->nodeid > 0)) { + record_peer_nodeid(v, host); + } +} + +static void +attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter) +{ + attribute_t *a = NULL; + const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME); + const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); + const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); + int is_force_write = 0; + + if (attr == NULL) { + crm_warn("Could not update attribute: peer did not specify name"); + return; + } + + crm_element_value_int(xml, PCMK__XA_ATTR_FORCE, &is_force_write); + + a = attrd_populate_attribute(xml, attr); + if (a == NULL) { + return; + } + + if (host == NULL) { + // If no host was specified, update all hosts + GHashTableIter vIter; + + crm_debug("Setting %s for all hosts to %s", attr, value); + xml_remove_prop(xml, PCMK__XA_ATTR_NODE_ID); + g_hash_table_iter_init(&vIter, a->values); + + while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) { + update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write); + } + + } else { + // Update attribute value for the given host + update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write); + } + + /* If this is a message from some attrd instance broadcasting its protocol + * version, check to see if it's a new minimum version. + */ + if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) { + attrd_update_minimum_protocol_ver(peer->uname, value); + } +} + +static void +broadcast_unseen_local_values(void) +{ + GHashTableIter aIter; + GHashTableIter vIter; + attribute_t *a = NULL; + attribute_value_t *v = NULL; + xmlNode *sync = NULL; + + g_hash_table_iter_init(&aIter, attributes); + while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { + g_hash_table_iter_init(&vIter, a->values); + while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { + if (!(v->seen) && pcmk__str_eq(v->nodename, attrd_cluster->uname, + pcmk__str_casei)) { + if (sync == NULL) { + sync = create_xml_node(NULL, __func__); + crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); + } + attrd_add_value_xml(sync, a, v, a->timeout_ms && a->timer); + } + } + } + + if (sync != NULL) { + crm_debug("Broadcasting local-only values"); + attrd_send_message(NULL, sync, false); + free_xml(sync); + } +} + +int +attrd_cluster_connect(void) +{ + attrd_cluster = pcmk_cluster_new(); + + attrd_cluster->destroy = attrd_cpg_destroy; + attrd_cluster->cpg.cpg_deliver_fn = attrd_cpg_dispatch; + attrd_cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; + + crm_set_status_callback(&attrd_peer_change_cb); + + if (crm_cluster_connect(attrd_cluster) == FALSE) { + crm_err("Cluster connection failed"); + return -ENOTCONN; + } + return pcmk_ok; +} + +void +attrd_peer_clear_failure(pcmk__request_t *request) +{ + xmlNode *xml = request->xml; + const char *rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE); + const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); + const char *op = crm_element_value(xml, PCMK__XA_ATTR_OPERATION); + const char *interval_spec = crm_element_value(xml, PCMK__XA_ATTR_INTERVAL); + guint interval_ms = crm_parse_interval_spec(interval_spec); + char *attr = NULL; + GHashTableIter iter; + regex_t regex; + + crm_node_t *peer = crm_get_peer(0, request->peer); + + if (attrd_failure_regex(®ex, rsc, op, interval_ms) != pcmk_ok) { + crm_info("Ignoring invalid request to clear failures for %s", + pcmk__s(rsc, "all resources")); + return; + } + + crm_xml_add(xml, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE); + + /* Make sure value is not set, so we delete */ + if (crm_element_value(xml, PCMK__XA_ATTR_VALUE)) { + crm_xml_replace(xml, PCMK__XA_ATTR_VALUE, NULL); + } + + g_hash_table_iter_init(&iter, attributes); + while (g_hash_table_iter_next(&iter, (gpointer *) &attr, NULL)) { + if (regexec(®ex, attr, 0, NULL, 0) == 0) { + crm_trace("Matched %s when clearing %s", + attr, pcmk__s(rsc, "all resources")); + crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr); + attrd_peer_update(peer, xml, host, false); + } + } + regfree(®ex); +} + +/*! + * \internal + * \brief Load attributes from a peer sync response + * + * \param[in] peer Peer that sent clear request + * \param[in] peer_won Whether peer is the attribute writer + * \param[in,out] xml Request XML + */ +void +attrd_peer_sync_response(const crm_node_t *peer, bool peer_won, xmlNode *xml) +{ + crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s", + peer->uname); + + if (peer_won) { + /* Initialize the "seen" flag for all attributes to cleared, so we can + * detect attributes that local node has but the writer doesn't. + */ + attrd_clear_value_seen(); + } + + // Process each attribute update in the sync response + for (xmlNode *child = pcmk__xml_first_child(xml); child != NULL; + child = pcmk__xml_next(child)) { + attrd_peer_update(peer, child, + crm_element_value(child, PCMK__XA_ATTR_NODE_NAME), + true); + } + + if (peer_won) { + /* If any attributes are still not marked as seen, the writer doesn't + * know about them, so send all peers an update with them. + */ + broadcast_unseen_local_values(); + } +} + +/*! + * \internal + * \brief Remove all attributes and optionally peer cache entries for a node + * + * \param[in] host Name of node to purge + * \param[in] uncache If true, remove node from peer caches + * \param[in] source Who requested removal (only used for logging) + */ +void +attrd_peer_remove(const char *host, bool uncache, const char *source) +{ + attribute_t *a = NULL; + GHashTableIter aIter; + + CRM_CHECK(host != NULL, return); + crm_notice("Removing all %s attributes for peer %s", host, source); + + g_hash_table_iter_init(&aIter, attributes); + while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { + if(g_hash_table_remove(a->values, host)) { + crm_debug("Removed %s[%s] for peer %s", a->id, host, source); + } + } + + if (uncache) { + crm_remote_peer_cache_remove(host); + reap_crm_member(0, host); + } +} + +void +attrd_peer_sync(crm_node_t *peer, xmlNode *xml) +{ + GHashTableIter aIter; + GHashTableIter vIter; + + attribute_t *a = NULL; + attribute_value_t *v = NULL; + xmlNode *sync = create_xml_node(NULL, __func__); + + crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); + + g_hash_table_iter_init(&aIter, attributes); + while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { + g_hash_table_iter_init(&vIter, a->values); + while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { + crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone"); + attrd_add_value_xml(sync, a, v, false); + } + } + + crm_debug("Syncing values to %s", peer?peer->uname:"everyone"); + attrd_send_message(peer, sync, false); + free_xml(sync); +} + +void +attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, + bool filter) +{ + bool handle_sync_point = false; + + if (xml_has_children(xml)) { + for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL; + child = crm_next_same_xml(child)) { + attrd_copy_xml_attributes(xml, child); + attrd_peer_update_one(peer, child, filter); + + if (attrd_request_has_sync_point(child)) { + handle_sync_point = true; + } + } + + } else { + attrd_peer_update_one(peer, xml, filter); + + if (attrd_request_has_sync_point(xml)) { + handle_sync_point = true; + } + } + + /* If the update XML specified that the client wanted to wait for a sync + * point, process that now. + */ + if (handle_sync_point) { + crm_trace("Hit local sync point for attribute update"); + attrd_ack_waitlist_clients(attrd_sync_point_local, xml); + } +} diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c new file mode 100644 index 0000000..3b6b55a --- /dev/null +++ b/daemons/attrd/attrd_elections.c @@ -0,0 +1,179 @@ +/* + * Copyright 2013-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> +#include <crm/msg_xml.h> +#include <crm/cluster.h> +#include <crm/cluster/election_internal.h> + +#include "pacemaker-attrd.h" + +static char *peer_writer = NULL; +static election_t *writer = NULL; + +static gboolean +attrd_election_cb(gpointer user_data) +{ + attrd_declare_winner(); + + /* Update the peers after an election */ + attrd_peer_sync(NULL, NULL); + + /* Update the CIB after an election */ + attrd_write_attributes(true, false); + return FALSE; +} + +void +attrd_election_init(void) +{ + writer = election_init(T_ATTRD, attrd_cluster->uname, 120000, + attrd_election_cb); +} + +void +attrd_election_fini(void) +{ + election_fini(writer); +} + +void +attrd_start_election_if_needed(void) +{ + if ((peer_writer == NULL) + && (election_state(writer) != election_in_progress) + && !attrd_shutting_down()) { + + crm_info("Starting an election to determine the writer"); + election_vote(writer); + } +} + +bool +attrd_election_won(void) +{ + return (election_state(writer) == election_won); +} + +void +attrd_handle_election_op(const crm_node_t *peer, xmlNode *xml) +{ + enum election_result rc = 0; + enum election_result previous = election_state(writer); + + crm_xml_add(xml, F_CRM_HOST_FROM, peer->uname); + + // Don't become writer if we're shutting down + rc = election_count_vote(writer, xml, !attrd_shutting_down()); + + switch(rc) { + case election_start: + crm_debug("Unsetting writer (was %s) and starting new election", + peer_writer? peer_writer : "unset"); + free(peer_writer); + peer_writer = NULL; + election_vote(writer); + break; + + case election_lost: + /* The election API should really distinguish between "we just lost + * to this peer" and "we already lost previously, and we are + * discarding this vote for some reason", but it doesn't. + * + * In the first case, we want to tentatively set the peer writer to + * this peer, even though another peer may eventually win (which we + * will learn via attrd_check_for_new_writer()), so + * attrd_start_election_if_needed() doesn't start a new election. + * + * Approximate a test for that case as best as possible. + */ + if ((peer_writer == NULL) || (previous != election_lost)) { + pcmk__str_update(&peer_writer, peer->uname); + crm_debug("Election lost, presuming %s is writer for now", + peer_writer); + } + break; + + case election_in_progress: + election_check(writer); + break; + + default: + crm_info("Ignoring election op from %s due to error", peer->uname); + break; + } +} + +bool +attrd_check_for_new_writer(const crm_node_t *peer, const xmlNode *xml) +{ + int peer_state = 0; + + crm_element_value_int(xml, PCMK__XA_ATTR_WRITER, &peer_state); + if (peer_state == election_won) { + if ((election_state(writer) == election_won) + && !pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) { + crm_notice("Detected another attribute writer (%s), starting new election", + peer->uname); + election_vote(writer); + + } else if (!pcmk__str_eq(peer->uname, peer_writer, pcmk__str_casei)) { + crm_notice("Recorded new attribute writer: %s (was %s)", + peer->uname, (peer_writer? peer_writer : "unset")); + pcmk__str_update(&peer_writer, peer->uname); + } + } + return (peer_state == election_won); +} + +void +attrd_declare_winner(void) +{ + crm_notice("Recorded local node as attribute writer (was %s)", + (peer_writer? peer_writer : "unset")); + pcmk__str_update(&peer_writer, attrd_cluster->uname); +} + +void +attrd_remove_voter(const crm_node_t *peer) +{ + election_remove(writer, peer->uname); + if (peer_writer && pcmk__str_eq(peer->uname, peer_writer, pcmk__str_casei)) { + free(peer_writer); + peer_writer = NULL; + crm_notice("Lost attribute writer %s", peer->uname); + + /* Clear any election dampening in effect. Otherwise, if the lost writer + * had just won, the election could fizzle out with no new writer. + */ + election_clear_dampening(writer); + + /* If the writer received attribute updates during its shutdown, it will + * not have written them to the CIB. Ensure we get a new writer so they + * are written out. This means that every node that sees the writer + * leave will start a new election, but that's better than losing + * attributes. + */ + attrd_start_election_if_needed(); + + /* If an election is in progress, we need to call election_check(), in case + * this lost peer is the only one that hasn't voted, otherwise the election + * would be pending until it's timed out. + */ + } else if (election_state(writer) == election_in_progress) { + crm_debug("Checking election status upon loss of voter %s", peer->uname); + election_check(writer); + } +} + +void +attrd_xml_add_writer(xmlNode *xml) +{ + crm_xml_add_int(xml, PCMK__XA_ATTR_WRITER, election_state(writer)); +} diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c new file mode 100644 index 0000000..9d3dfff --- /dev/null +++ b/daemons/attrd/attrd_ipc.c @@ -0,0 +1,628 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <errno.h> +#include <stdint.h> +#include <stdlib.h> +#include <sys/types.h> + +#include <crm/cluster.h> +#include <crm/cluster/internal.h> +#include <crm/msg_xml.h> +#include <crm/common/acl_internal.h> +#include <crm/common/ipc_internal.h> +#include <crm/common/logging.h> +#include <crm/common/results.h> +#include <crm/common/strings_internal.h> +#include <crm/common/util.h> + +#include "pacemaker-attrd.h" + +static qb_ipcs_service_t *ipcs = NULL; + +/*! + * \internal + * \brief Build the XML reply to a client query + * + * param[in] attr Name of requested attribute + * param[in] host Name of requested host (or NULL for all hosts) + * + * \return New XML reply + * \note Caller is responsible for freeing the resulting XML + */ +static xmlNode *build_query_reply(const char *attr, const char *host) +{ + xmlNode *reply = create_xml_node(NULL, __func__); + attribute_t *a; + + if (reply == NULL) { + return NULL; + } + crm_xml_add(reply, F_TYPE, T_ATTRD); + crm_xml_add(reply, F_SUBTYPE, PCMK__ATTRD_CMD_QUERY); + crm_xml_add(reply, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); + + /* If desired attribute exists, add its value(s) to the reply */ + a = g_hash_table_lookup(attributes, attr); + if (a) { + attribute_value_t *v; + xmlNode *host_value; + + crm_xml_add(reply, PCMK__XA_ATTR_NAME, attr); + + /* Allow caller to use "localhost" to refer to local node */ + if (pcmk__str_eq(host, "localhost", pcmk__str_casei)) { + host = attrd_cluster->uname; + crm_trace("Mapped localhost to %s", host); + } + + /* If a specific node was requested, add its value */ + if (host) { + v = g_hash_table_lookup(a->values, host); + host_value = create_xml_node(reply, XML_CIB_TAG_NODE); + if (host_value == NULL) { + free_xml(reply); + return NULL; + } + pcmk__xe_add_node(host_value, host, 0); + crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, + (v? v->current : NULL)); + + /* Otherwise, add all nodes' values */ + } else { + GHashTableIter iter; + + g_hash_table_iter_init(&iter, a->values); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { + host_value = create_xml_node(reply, XML_CIB_TAG_NODE); + if (host_value == NULL) { + free_xml(reply); + return NULL; + } + pcmk__xe_add_node(host_value, v->nodename, 0); + crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, v->current); + } + } + } + return reply; +} + +xmlNode * +attrd_client_clear_failure(pcmk__request_t *request) +{ + xmlNode *xml = request->xml; + const char *rsc, *op, *interval_spec; + + if (minimum_protocol_version >= 2) { + /* Propagate to all peers (including ourselves). + * This ends up at attrd_peer_message(). + */ + attrd_send_message(NULL, xml, false); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } + + rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE); + op = crm_element_value(xml, PCMK__XA_ATTR_OPERATION); + interval_spec = crm_element_value(xml, PCMK__XA_ATTR_INTERVAL); + + /* Map this to an update */ + crm_xml_add(xml, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE); + + /* Add regular expression matching desired attributes */ + + if (rsc) { + char *pattern; + + if (op == NULL) { + pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc); + + } else { + guint interval_ms = crm_parse_interval_spec(interval_spec); + + pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP, + rsc, op, interval_ms); + } + + crm_xml_add(xml, PCMK__XA_ATTR_PATTERN, pattern); + free(pattern); + + } else { + crm_xml_add(xml, PCMK__XA_ATTR_PATTERN, ATTRD_RE_CLEAR_ALL); + } + + /* Make sure attribute and value are not set, so we delete via regex */ + if (crm_element_value(xml, PCMK__XA_ATTR_NAME)) { + crm_xml_replace(xml, PCMK__XA_ATTR_NAME, NULL); + } + if (crm_element_value(xml, PCMK__XA_ATTR_VALUE)) { + crm_xml_replace(xml, PCMK__XA_ATTR_VALUE, NULL); + } + + return attrd_client_update(request); +} + +xmlNode * +attrd_client_peer_remove(pcmk__request_t *request) +{ + xmlNode *xml = request->xml; + + // Host and ID are not used in combination, rather host has precedence + const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); + char *host_alloc = NULL; + + attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); + + if (host == NULL) { + int nodeid = 0; + + crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID, &nodeid); + if (nodeid > 0) { + crm_node_t *node = pcmk__search_cluster_node_cache(nodeid, NULL); + char *host_alloc = NULL; + + if (node && node->uname) { + // Use cached name if available + host = node->uname; + } else { + // Otherwise ask cluster layer + host_alloc = get_node_name(nodeid); + host = host_alloc; + } + pcmk__xe_add_node(xml, host, 0); + } + } + + if (host) { + crm_info("Client %s is requesting all values for %s be removed", + pcmk__client_name(request->ipc_client), host); + attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ + free(host_alloc); + } else { + crm_info("Ignoring request by client %s to remove all peer values without specifying peer", + pcmk__client_name(request->ipc_client)); + } + + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; +} + +xmlNode * +attrd_client_query(pcmk__request_t *request) +{ + xmlNode *query = request->xml; + xmlNode *reply = NULL; + const char *attr = NULL; + + crm_debug("Query arrived from %s", pcmk__client_name(request->ipc_client)); + + /* Request must specify attribute name to query */ + attr = crm_element_value(query, PCMK__XA_ATTR_NAME); + if (attr == NULL) { + pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, + "Ignoring malformed query from %s (no attribute name given)", + pcmk__client_name(request->ipc_client)); + return NULL; + } + + /* Build the XML reply */ + reply = build_query_reply(attr, crm_element_value(query, + PCMK__XA_ATTR_NODE_NAME)); + if (reply == NULL) { + pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, + "Could not respond to query from %s: could not create XML reply", + pcmk__client_name(request->ipc_client)); + return NULL; + } else { + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + } + + request->ipc_client->request_id = 0; + return reply; +} + +xmlNode * +attrd_client_refresh(pcmk__request_t *request) +{ + crm_info("Updating all attributes"); + + attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); + attrd_write_attributes(true, true); + + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; +} + +static void +handle_missing_host(xmlNode *xml) +{ + const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); + + if (host == NULL) { + crm_trace("Inferring host"); + pcmk__xe_add_node(xml, attrd_cluster->uname, attrd_cluster->nodeid); + } +} + +/* Convert a single IPC message with a regex into one with multiple children, one + * for each regex match. + */ +static int +expand_regexes(xmlNode *xml, const char *attr, const char *value, const char *regex) +{ + if (attr == NULL && regex) { + bool matched = false; + GHashTableIter aIter; + regex_t r_patt; + + crm_debug("Setting %s to %s", regex, value); + if (regcomp(&r_patt, regex, REG_EXTENDED|REG_NOSUB)) { + return EINVAL; + } + + g_hash_table_iter_init(&aIter, attributes); + while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) { + int status = regexec(&r_patt, attr, 0, NULL, 0); + + if (status == 0) { + xmlNode *child = create_xml_node(xml, XML_ATTR_OP); + + crm_trace("Matched %s with %s", attr, regex); + matched = true; + + /* Copy all the attributes from the parent over, but remove the + * regex and replace it with the name. + */ + attrd_copy_xml_attributes(xml, child); + crm_xml_replace(child, PCMK__XA_ATTR_PATTERN, NULL); + crm_xml_add(child, PCMK__XA_ATTR_NAME, attr); + } + } + + regfree(&r_patt); + + /* Return a code if we never matched anything. This should not be treated + * as an error. It indicates there was a regex, and it was a valid regex, + * but simply did not match anything and the caller should not continue + * doing any regex-related processing. + */ + if (!matched) { + return pcmk_rc_op_unsatisfied; + } + + } else if (attr == NULL) { + return pcmk_rc_bad_nvpair; + } + + return pcmk_rc_ok; +} + +static int +handle_regexes(pcmk__request_t *request) +{ + xmlNode *xml = request->xml; + int rc = pcmk_rc_ok; + + const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME); + const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); + const char *regex = crm_element_value(xml, PCMK__XA_ATTR_PATTERN); + + rc = expand_regexes(xml, attr, value, regex); + + if (rc == EINVAL) { + pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, + "Bad regex '%s' for update from client %s", regex, + pcmk__client_name(request->ipc_client)); + + } else if (rc == pcmk_rc_bad_nvpair) { + crm_err("Update request did not specify attribute or regular expression"); + pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, + "Client %s update request did not specify attribute or regular expression", + pcmk__client_name(request->ipc_client)); + } + + return rc; +} + +static int +handle_value_expansion(const char **value, xmlNode *xml, const char *op, + const char *attr) +{ + attribute_t *a = g_hash_table_lookup(attributes, attr); + + if (a == NULL && pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) { + return EINVAL; + } + + if (*value && attrd_value_needs_expansion(*value)) { + int int_value; + attribute_value_t *v = NULL; + + if (a) { + const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); + v = g_hash_table_lookup(a->values, host); + } + + int_value = attrd_expand_value(*value, (v? v->current : NULL)); + + crm_info("Expanded %s=%s to %d", attr, *value, int_value); + crm_xml_add_int(xml, PCMK__XA_ATTR_VALUE, int_value); + + /* Replacing the value frees the previous memory, so re-query it */ + *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); + } + + return pcmk_rc_ok; +} + +static void +send_update_msg_to_cluster(pcmk__request_t *request, xmlNode *xml) +{ + if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { + /* The client is waiting on the cluster-wide sync point. In this case, + * the response ACK is not sent until this attrd broadcasts the update + * and receives its own confirmation back from all peers. + */ + attrd_expect_confirmations(request, attrd_cluster_sync_point_update); + attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ + + } else { + /* The client is either waiting on the local sync point or was not + * waiting on any sync point at all. For the local sync point, the + * response ACK is sent in attrd_peer_update. For clients not + * waiting on any sync point, the response ACK is sent in + * handle_update_request immediately before this function was called. + */ + attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ + } +} + +static int +send_child_update(xmlNode *child, void *data) +{ + pcmk__request_t *request = (pcmk__request_t *) data; + + /* Calling pcmk__set_result is handled by one of these calls to + * attrd_client_update, so no need to do it again here. + */ + request->xml = child; + attrd_client_update(request); + return pcmk_rc_ok; +} + +xmlNode * +attrd_client_update(pcmk__request_t *request) +{ + xmlNode *xml = request->xml; + const char *attr, *value, *regex; + + /* If the message has children, that means it is a message from a newer + * client that supports sending multiple operations at a time. There are + * two ways we can handle that. + */ + if (xml_has_children(xml)) { + if (ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version)) { + /* First, if all peers support a certain protocol version, we can + * just broadcast the big message and they'll handle it. However, + * we also need to apply all the transformations in this function + * to the children since they don't happen anywhere else. + */ + for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL; + child = crm_next_same_xml(child)) { + attr = crm_element_value(child, PCMK__XA_ATTR_NAME); + value = crm_element_value(child, PCMK__XA_ATTR_VALUE); + + handle_missing_host(child); + + if (handle_value_expansion(&value, child, request->op, attr) == EINVAL) { + pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR, + "Attribute %s does not exist", attr); + return NULL; + } + } + + send_update_msg_to_cluster(request, xml); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + + } else { + /* Save the original xml node pointer so it can be restored after iterating + * over all the children. + */ + xmlNode *orig_xml = request->xml; + + /* Second, if they do not support that protocol version, split it + * up into individual messages and call attrd_client_update on + * each one. + */ + pcmk__xe_foreach_child(xml, XML_ATTR_OP, send_child_update, request); + request->xml = orig_xml; + } + + return NULL; + } + + attr = crm_element_value(xml, PCMK__XA_ATTR_NAME); + value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); + regex = crm_element_value(xml, PCMK__XA_ATTR_PATTERN); + + if (handle_regexes(request) != pcmk_rc_ok) { + /* Error handling was already dealt with in handle_regexes, so just return. */ + return NULL; + } else if (regex) { + /* Recursively call attrd_client_update on the new message with regexes + * expanded. If supported by the attribute daemon, this means that all + * matches can also be handled atomically. + */ + return attrd_client_update(request); + } + + handle_missing_host(xml); + + if (handle_value_expansion(&value, xml, request->op, attr) == EINVAL) { + pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR, + "Attribute %s does not exist", attr); + return NULL; + } + + crm_debug("Broadcasting %s[%s]=%s%s", attr, crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME), + value, (attrd_election_won()? " (writer)" : "")); + + send_update_msg_to_cluster(request, xml); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; +} + +/*! + * \internal + * \brief Accept a new client IPC connection + * + * \param[in,out] c New connection + * \param[in] uid Client user id + * \param[in] gid Client group id + * + * \return pcmk_ok on success, -errno otherwise + */ +static int32_t +attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) +{ + crm_trace("New client connection %p", c); + if (attrd_shutting_down()) { + crm_info("Ignoring new connection from pid %d during shutdown", + pcmk__client_pid(c)); + return -EPERM; + } + + if (pcmk__new_client(c, uid, gid) == NULL) { + return -EIO; + } + return pcmk_ok; +} + +/*! + * \internal + * \brief Destroy a client IPC connection + * + * \param[in] c Connection to destroy + * + * \return FALSE (i.e. do not re-run this callback) + */ +static int32_t +attrd_ipc_closed(qb_ipcs_connection_t *c) +{ + pcmk__client_t *client = pcmk__find_client(c); + + if (client == NULL) { + crm_trace("Ignoring request to clean up unknown connection %p", c); + } else { + crm_trace("Cleaning up closed client connection %p", c); + + /* Remove the client from the sync point waitlist if it's present. */ + attrd_remove_client_from_waitlist(client); + + /* And no longer wait for confirmations from any peers. */ + attrd_do_not_wait_for_client(client); + + pcmk__free_client(client); + } + + return FALSE; +} + +/*! + * \internal + * \brief Destroy a client IPC connection + * + * \param[in,out] c Connection to destroy + * + * \note We handle a destroyed connection the same as a closed one, + * but we need a separate handler because the return type is different. + */ +static void +attrd_ipc_destroy(qb_ipcs_connection_t *c) +{ + crm_trace("Destroying client connection %p", c); + attrd_ipc_closed(c); +} + +static int32_t +attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) +{ + uint32_t id = 0; + uint32_t flags = 0; + pcmk__client_t *client = pcmk__find_client(c); + xmlNode *xml = NULL; + + // Sanity-check, and parse XML from IPC data + CRM_CHECK((c != NULL) && (client != NULL), return 0); + if (data == NULL) { + crm_debug("No IPC data from PID %d", pcmk__client_pid(c)); + return 0; + } + + xml = pcmk__client_data2xml(client, data, &id, &flags); + + if (xml == NULL) { + crm_debug("Unrecognizable IPC data from PID %d", pcmk__client_pid(c)); + pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL); + return 0; + + } else { + pcmk__request_t request = { + .ipc_client = client, + .ipc_id = id, + .ipc_flags = flags, + .peer = NULL, + .xml = xml, + .call_options = 0, + .result = PCMK__UNKNOWN_RESULT, + }; + + CRM_ASSERT(client->user != NULL); + pcmk__update_acl_user(xml, PCMK__XA_ATTR_USER, client->user); + + request.op = crm_element_value_copy(request.xml, PCMK__XA_TASK); + CRM_CHECK(request.op != NULL, return 0); + + attrd_handle_request(&request); + pcmk__reset_request(&request); + } + + free_xml(xml); + return 0; +} + +static struct qb_ipcs_service_handlers ipc_callbacks = { + .connection_accept = attrd_ipc_accept, + .connection_created = NULL, + .msg_process = attrd_ipc_dispatch, + .connection_closed = attrd_ipc_closed, + .connection_destroyed = attrd_ipc_destroy +}; + +void +attrd_ipc_fini(void) +{ + if (ipcs != NULL) { + pcmk__drop_all_clients(ipcs); + qb_ipcs_destroy(ipcs); + ipcs = NULL; + } +} + +/*! + * \internal + * \brief Set up attrd IPC communication + */ +void +attrd_init_ipc(void) +{ + pcmk__serve_attrd_ipc(&ipcs, &ipc_callbacks); +} diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c new file mode 100644 index 0000000..184176a --- /dev/null +++ b/daemons/attrd/attrd_messages.c @@ -0,0 +1,328 @@ +/* + * Copyright 2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <glib.h> + +#include <crm/common/messages_internal.h> +#include <crm/msg_xml.h> + +#include "pacemaker-attrd.h" + +int minimum_protocol_version = -1; + +static GHashTable *attrd_handlers = NULL; + +static xmlNode * +handle_unknown_request(pcmk__request_t *request) +{ + crm_err("Unknown IPC request %s from %s %s", + request->op, pcmk__request_origin_type(request), + pcmk__request_origin(request)); + pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, + "Unknown request type '%s' (bug?)", request->op); + return NULL; +} + +static xmlNode * +handle_clear_failure_request(pcmk__request_t *request) +{ + if (request->peer != NULL) { + /* It is not currently possible to receive this as a peer command, + * but will be, if we one day enable propagating this operation. + */ + attrd_peer_clear_failure(request); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } else { + if (attrd_request_has_sync_point(request->xml)) { + /* If this client supplied a sync point it wants to wait for, add it to + * the wait list. Clients on this list will not receive an ACK until + * their sync point is hit which will result in the client stalled there + * until it receives a response. + * + * All other clients will receive the expected response as normal. + */ + attrd_add_client_to_waitlist(request); + + } else { + /* If the client doesn't want to wait for a sync point, go ahead and send + * the ACK immediately. Otherwise, we'll send the ACK when the appropriate + * sync point is reached. + */ + attrd_send_ack(request->ipc_client, request->ipc_id, + request->ipc_flags); + } + + return attrd_client_clear_failure(request); + } +} + +static xmlNode * +handle_confirm_request(pcmk__request_t *request) +{ + if (request->peer != NULL) { + int callid; + + crm_debug("Received confirmation from %s", request->peer); + + if (crm_element_value_int(request->xml, XML_LRM_ATTR_CALLID, &callid) == -1) { + pcmk__set_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, + "Could not get callid from XML"); + } else { + attrd_handle_confirmation(callid, request->peer); + } + + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } else { + return handle_unknown_request(request); + } +} + +static xmlNode * +handle_flush_request(pcmk__request_t *request) +{ + if (request->peer != NULL) { + /* Ignore. The flush command was removed in 2.0.0 but may be + * received from peers running older versions. + */ + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } else { + return handle_unknown_request(request); + } +} + +static xmlNode * +handle_query_request(pcmk__request_t *request) +{ + if (request->peer != NULL) { + return handle_unknown_request(request); + } else { + return attrd_client_query(request); + } +} + +static xmlNode * +handle_remove_request(pcmk__request_t *request) +{ + if (request->peer != NULL) { + const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_NODE_NAME); + attrd_peer_remove(host, true, request->peer); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } else { + return attrd_client_peer_remove(request); + } +} + +static xmlNode * +handle_refresh_request(pcmk__request_t *request) +{ + if (request->peer != NULL) { + return handle_unknown_request(request); + } else { + return attrd_client_refresh(request); + } +} + +static xmlNode * +handle_sync_request(pcmk__request_t *request) +{ + if (request->peer != NULL) { + crm_node_t *peer = crm_get_peer(0, request->peer); + + attrd_peer_sync(peer, request->xml); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } else { + return handle_unknown_request(request); + } +} + +static xmlNode * +handle_sync_response_request(pcmk__request_t *request) +{ + if (request->ipc_client != NULL) { + return handle_unknown_request(request); + } else { + if (request->peer != NULL) { + crm_node_t *peer = crm_get_peer(0, request->peer); + bool peer_won = attrd_check_for_new_writer(peer, request->xml); + + if (!pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) { + attrd_peer_sync_response(peer, peer_won, request->xml); + } + } + + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } +} + +static xmlNode * +handle_update_request(pcmk__request_t *request) +{ + if (request->peer != NULL) { + const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_NODE_NAME); + crm_node_t *peer = crm_get_peer(0, request->peer); + + attrd_peer_update(peer, request->xml, host, false); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + + } else { + if (attrd_request_has_sync_point(request->xml)) { + /* If this client supplied a sync point it wants to wait for, add it to + * the wait list. Clients on this list will not receive an ACK until + * their sync point is hit which will result in the client stalled there + * until it receives a response. + * + * All other clients will receive the expected response as normal. + */ + attrd_add_client_to_waitlist(request); + + } else { + /* If the client doesn't want to wait for a sync point, go ahead and send + * the ACK immediately. Otherwise, we'll send the ACK when the appropriate + * sync point is reached. + * + * In the normal case, attrd_client_update can be called recursively which + * makes where to send the ACK tricky. Doing it here ensures the client + * only ever receives one. + */ + attrd_send_ack(request->ipc_client, request->ipc_id, + request->flags|crm_ipc_client_response); + } + + return attrd_client_update(request); + } +} + +static void +attrd_register_handlers(void) +{ + pcmk__server_command_t handlers[] = { + { PCMK__ATTRD_CMD_CLEAR_FAILURE, handle_clear_failure_request }, + { PCMK__ATTRD_CMD_CONFIRM, handle_confirm_request }, + { PCMK__ATTRD_CMD_FLUSH, handle_flush_request }, + { PCMK__ATTRD_CMD_PEER_REMOVE, handle_remove_request }, + { PCMK__ATTRD_CMD_QUERY, handle_query_request }, + { PCMK__ATTRD_CMD_REFRESH, handle_refresh_request }, + { PCMK__ATTRD_CMD_SYNC, handle_sync_request }, + { PCMK__ATTRD_CMD_SYNC_RESPONSE, handle_sync_response_request }, + { PCMK__ATTRD_CMD_UPDATE, handle_update_request }, + { PCMK__ATTRD_CMD_UPDATE_DELAY, handle_update_request }, + { PCMK__ATTRD_CMD_UPDATE_BOTH, handle_update_request }, + { NULL, handle_unknown_request }, + }; + + attrd_handlers = pcmk__register_handlers(handlers); +} + +void +attrd_unregister_handlers(void) +{ + if (attrd_handlers != NULL) { + g_hash_table_destroy(attrd_handlers); + attrd_handlers = NULL; + } +} + +void +attrd_handle_request(pcmk__request_t *request) +{ + xmlNode *reply = NULL; + char *log_msg = NULL; + const char *reason = NULL; + + if (attrd_handlers == NULL) { + attrd_register_handlers(); + } + + reply = pcmk__process_request(request, attrd_handlers); + + if (reply != NULL) { + crm_log_xml_trace(reply, "Reply"); + + if (request->ipc_client != NULL) { + pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply, + request->ipc_flags); + } else { + crm_err("Not sending CPG reply to client"); + } + + free_xml(reply); + } + + reason = request->result.exit_reason; + log_msg = crm_strdup_printf("Processed %s request from %s %s: %s%s%s%s", + request->op, pcmk__request_origin_type(request), + pcmk__request_origin(request), + pcmk_exec_status_str(request->result.execution_status), + (reason == NULL)? "" : " (", + pcmk__s(reason, ""), + (reason == NULL)? "" : ")"); + + if (!pcmk__result_ok(&request->result)) { + crm_warn("%s", log_msg); + } else { + crm_debug("%s", log_msg); + } + + free(log_msg); + pcmk__reset_request(request); +} + +/*! + \internal + \brief Broadcast private attribute for local node with protocol version +*/ +void +attrd_broadcast_protocol(void) +{ + xmlNode *attrd_op = create_xml_node(NULL, __func__); + + crm_xml_add(attrd_op, F_TYPE, T_ATTRD); + crm_xml_add(attrd_op, F_ORIG, crm_system_name); + crm_xml_add(attrd_op, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE); + crm_xml_add(attrd_op, PCMK__XA_ATTR_NAME, CRM_ATTR_PROTOCOL); + crm_xml_add(attrd_op, PCMK__XA_ATTR_VALUE, ATTRD_PROTOCOL_VERSION); + crm_xml_add_int(attrd_op, PCMK__XA_ATTR_IS_PRIVATE, 1); + pcmk__xe_add_node(attrd_op, attrd_cluster->uname, attrd_cluster->nodeid); + + crm_debug("Broadcasting attrd protocol version %s for node %s", + ATTRD_PROTOCOL_VERSION, attrd_cluster->uname); + + attrd_send_message(NULL, attrd_op, false); /* ends up at attrd_peer_message() */ + + free_xml(attrd_op); +} + +gboolean +attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm) +{ + const char *op = crm_element_value(data, PCMK__XA_TASK); + + crm_xml_add(data, F_TYPE, T_ATTRD); + crm_xml_add(data, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); + + /* Request a confirmation from the destination peer node (which could + * be all if node is NULL) that the message has been received and + * acted upon. + */ + if (!pcmk__str_eq(op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { + pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm); + } + + attrd_xml_add_writer(data); + return send_cluster_message(node, crm_msg_attrd, data, TRUE); +} diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c new file mode 100644 index 0000000..d59ddd5 --- /dev/null +++ b/daemons/attrd/attrd_sync.c @@ -0,0 +1,577 @@ +/* + * Copyright 2022-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <crm/msg_xml.h> +#include <crm/common/attrd_internal.h> + +#include "pacemaker-attrd.h" + +/* A hash table storing clients that are waiting on a sync point to be reached. + * The key is waitlist_client - just a plain int. The obvious key would be + * the IPC client's ID, but this is not guaranteed to be unique. A single client + * could be waiting on a sync point for multiple attributes at the same time. + * + * It is not expected that this hash table will ever be especially large. + */ +static GHashTable *waitlist = NULL; +static int waitlist_client = 0; + +struct waitlist_node { + /* What kind of sync point does this node describe? */ + enum attrd_sync_point sync_point; + + /* Information required to construct and send a reply to the client. */ + char *client_id; + uint32_t ipc_id; + uint32_t flags; +}; + +/* A hash table storing information on in-progress IPC requests that are awaiting + * confirmations. These requests are currently being processed by peer attrds and + * we are waiting to receive confirmation messages from each peer indicating that + * processing is complete. + * + * Multiple requests could be waiting on confirmations at the same time. + * + * The key is the unique callid for the IPC request, and the value is a + * confirmation_action struct. + */ +static GHashTable *expected_confirmations = NULL; + +/*! + * \internal + * \brief A structure describing a single IPC request that is awaiting confirmations + */ +struct confirmation_action { + /*! + * \brief A list of peer attrds that we are waiting to receive confirmation + * messages from + * + * This list is dynamic - as confirmations arrive from peer attrds, they will + * be removed from this list. When the list is empty, all peers have processed + * the request and the associated confirmation action will be taken. + */ + GList *respondents; + + /*! + * \brief A timer that will be used to remove the client should it time out + * before receiving all confirmations + */ + mainloop_timer_t *timer; + + /*! + * \brief A function to run when all confirmations have been received + */ + attrd_confirmation_action_fn fn; + + /*! + * \brief Information required to construct and send a reply to the client + */ + char *client_id; + uint32_t ipc_id; + uint32_t flags; + + /*! + * \brief The XML request containing the callid associated with this action + */ + void *xml; +}; + +static void +next_key(void) +{ + do { + waitlist_client++; + if (waitlist_client < 0) { + waitlist_client = 1; + } + } while (g_hash_table_contains(waitlist, GINT_TO_POINTER(waitlist_client))); +} + +static void +free_waitlist_node(gpointer data) +{ + struct waitlist_node *wl = (struct waitlist_node *) data; + + free(wl->client_id); + free(wl); +} + +static const char * +sync_point_str(enum attrd_sync_point sync_point) +{ + if (sync_point == attrd_sync_point_local) { + return PCMK__VALUE_LOCAL; + } else if (sync_point == attrd_sync_point_cluster) { + return PCMK__VALUE_CLUSTER; + } else { + return "unknown"; + } +} + +/*! + * \internal + * \brief Add a client to the attrd waitlist + * + * Typically, a client receives an ACK for its XML IPC request immediately. However, + * some clients want to wait until their request has been processed and taken effect. + * This is called a sync point. Any client placed on this waitlist will have its + * ACK message delayed until either its requested sync point is hit, or until it + * times out. + * + * The XML IPC request must specify the type of sync point it wants to wait for. + * + * \param[in,out] request The request describing the client to place on the waitlist. + */ +void +attrd_add_client_to_waitlist(pcmk__request_t *request) +{ + const char *sync_point = attrd_request_sync_point(request->xml); + struct waitlist_node *wl = NULL; + + if (sync_point == NULL) { + return; + } + + if (waitlist == NULL) { + waitlist = pcmk__intkey_table(free_waitlist_node); + } + + wl = calloc(sizeof(struct waitlist_node), 1); + + CRM_ASSERT(wl != NULL); + + wl->client_id = strdup(request->ipc_client->id); + + CRM_ASSERT(wl->client_id); + + if (pcmk__str_eq(sync_point, PCMK__VALUE_LOCAL, pcmk__str_none)) { + wl->sync_point = attrd_sync_point_local; + } else if (pcmk__str_eq(sync_point, PCMK__VALUE_CLUSTER, pcmk__str_none)) { + wl->sync_point = attrd_sync_point_cluster; + } else { + free_waitlist_node(wl); + return; + } + + wl->ipc_id = request->ipc_id; + wl->flags = request->flags; + + next_key(); + pcmk__intkey_table_insert(waitlist, waitlist_client, wl); + + crm_trace("Added client %s to waitlist for %s sync point", + wl->client_id, sync_point_str(wl->sync_point)); + crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); + + /* And then add the key to the request XML so we can uniquely identify + * it when it comes time to issue the ACK. + */ + crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); +} + +/*! + * \internal + * \brief Free all memory associated with the waitlist. This is most typically + * used when attrd shuts down. + */ +void +attrd_free_waitlist(void) +{ + if (waitlist == NULL) { + return; + } + + g_hash_table_destroy(waitlist); + waitlist = NULL; +} + +/*! + * \internal + * \brief Unconditionally remove a client from the waitlist, such as when the client + * node disconnects from the cluster + * + * \param[in] client The client to remove + */ +void +attrd_remove_client_from_waitlist(pcmk__client_t *client) +{ + GHashTableIter iter; + gpointer value; + + if (waitlist == NULL) { + return; + } + + g_hash_table_iter_init(&iter, waitlist); + + while (g_hash_table_iter_next(&iter, NULL, &value)) { + struct waitlist_node *wl = (struct waitlist_node *) value; + + if (pcmk__str_eq(wl->client_id, client->id, pcmk__str_none)) { + g_hash_table_iter_remove(&iter); + crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); + } + } +} + +/*! + * \internal + * \brief Send an IPC ACK message to all awaiting clients + * + * This function will search the waitlist for all clients that are currently awaiting + * an ACK indicating their attrd operation is complete. Only those clients with a + * matching sync point type and callid from their original XML IPC request will be + * ACKed. Once they have received an ACK, they will be removed from the waitlist. + * + * \param[in] sync_point What kind of sync point have we hit? + * \param[in] xml The original XML IPC request. + */ +void +attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) +{ + int callid; + gpointer value; + + if (waitlist == NULL) { + return; + } + + if (crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid) == -1) { + crm_warn("Could not get callid from request XML"); + return; + } + + value = pcmk__intkey_table_lookup(waitlist, callid); + if (value != NULL) { + struct waitlist_node *wl = (struct waitlist_node *) value; + pcmk__client_t *client = NULL; + + if (wl->sync_point != sync_point) { + return; + } + + crm_notice("Alerting client %s for reached %s sync point", + wl->client_id, sync_point_str(wl->sync_point)); + + client = pcmk__find_client_by_id(wl->client_id); + if (client == NULL) { + return; + } + + attrd_send_ack(client, wl->ipc_id, wl->flags | crm_ipc_client_response); + + /* And then remove the client so it doesn't get alerted again. */ + pcmk__intkey_table_remove(waitlist, callid); + + crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); + } +} + +/*! + * \internal + * \brief Action to take when a cluster sync point is hit for a + * PCMK__ATTRD_CMD_UPDATE* message. + * + * \param[in] xml The request that should be passed along to + * attrd_ack_waitlist_clients. This should be the original + * IPC request containing the callid for this update message. + */ +int +attrd_cluster_sync_point_update(xmlNode *xml) +{ + crm_trace("Hit cluster sync point for attribute update"); + attrd_ack_waitlist_clients(attrd_sync_point_cluster, xml); + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Return the sync point attribute for an IPC request + * + * This function will check both the top-level element of \p xml for a sync + * point attribute, as well as all of its \p op children, if any. The latter + * is useful for newer versions of attrd that can put multiple IPC requests + * into a single message. + * + * \param[in] xml An XML IPC request + * + * \note It is assumed that if one child element has a sync point attribute, + * all will have a sync point attribute and they will all be the same + * sync point. No other configuration is supported. + * + * \return The sync point attribute of \p xml, or NULL if none. + */ +const char * +attrd_request_sync_point(xmlNode *xml) +{ + if (xml_has_children(xml)) { + xmlNode *child = pcmk__xe_match(xml, XML_ATTR_OP, PCMK__XA_ATTR_SYNC_POINT, NULL); + + if (child) { + return crm_element_value(child, PCMK__XA_ATTR_SYNC_POINT); + } else { + return NULL; + } + + } else { + return crm_element_value(xml, PCMK__XA_ATTR_SYNC_POINT); + } +} + +/*! + * \internal + * \brief Does an IPC request contain any sync point attribute? + * + * \param[in] xml An XML IPC request + * + * \return true if there's a sync point attribute, false otherwise + */ +bool +attrd_request_has_sync_point(xmlNode *xml) +{ + return attrd_request_sync_point(xml) != NULL; +} + +static void +free_action(gpointer data) +{ + struct confirmation_action *action = (struct confirmation_action *) data; + g_list_free_full(action->respondents, free); + mainloop_timer_del(action->timer); + free_xml(action->xml); + free(action->client_id); + free(action); +} + +/* Remove an IPC request from the expected_confirmations table if the peer attrds + * don't respond before the timeout is hit. We set the timeout to 15s. The exact + * number isn't critical - we just want to make sure that the table eventually gets + * cleared of things that didn't complete. + */ +static gboolean +confirmation_timeout_cb(gpointer data) +{ + struct confirmation_action *action = (struct confirmation_action *) data; + + GHashTableIter iter; + gpointer value; + + if (expected_confirmations == NULL) { + return G_SOURCE_REMOVE; + } + + g_hash_table_iter_init(&iter, expected_confirmations); + + while (g_hash_table_iter_next(&iter, NULL, &value)) { + if (value == action) { + pcmk__client_t *client = pcmk__find_client_by_id(action->client_id); + if (client == NULL) { + return G_SOURCE_REMOVE; + } + + crm_trace("Timed out waiting for confirmations for client %s", client->id); + pcmk__ipc_send_ack(client, action->ipc_id, action->flags | crm_ipc_client_response, + "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_TIMEOUT); + + g_hash_table_iter_remove(&iter); + crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); + break; + } + } + + return G_SOURCE_REMOVE; +} + +/*! + * \internal + * \brief When a peer disconnects from the cluster, no longer wait for its confirmation + * for any IPC action. If this peer is the last one being waited on, this will + * trigger the confirmation action. + * + * \param[in] host The disconnecting peer attrd's uname + */ +void +attrd_do_not_expect_from_peer(const char *host) +{ + GList *keys = NULL; + + if (expected_confirmations == NULL) { + return; + } + + keys = g_hash_table_get_keys(expected_confirmations); + + crm_trace("Removing peer %s from expected confirmations", host); + + for (GList *node = keys; node != NULL; node = node->next) { + int callid = *(int *) node->data; + attrd_handle_confirmation(callid, host); + } + + g_list_free(keys); +} + +/*! + * \internal + * \brief When a client disconnects from the cluster, no longer wait on confirmations + * for it. Because the peer attrds may still be processing the original IPC + * message, they may still send us confirmations. However, we will take no + * action on them. + * + * \param[in] client The disconnecting client + */ +void +attrd_do_not_wait_for_client(pcmk__client_t *client) +{ + GHashTableIter iter; + gpointer value; + + if (expected_confirmations == NULL) { + return; + } + + g_hash_table_iter_init(&iter, expected_confirmations); + + while (g_hash_table_iter_next(&iter, NULL, &value)) { + struct confirmation_action *action = (struct confirmation_action *) value; + + if (pcmk__str_eq(action->client_id, client->id, pcmk__str_none)) { + crm_trace("Removing client %s from expected confirmations", client->id); + g_hash_table_iter_remove(&iter); + crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); + break; + } + } +} + +/*! + * \internal + * \brief Register some action to be taken when IPC request confirmations are + * received + * + * When this function is called, a list of all peer attrds that support confirming + * requests is generated. As confirmations from these peer attrds are received, + * they are removed from this list. When the list is empty, the registered action + * will be called. + * + * \note This function should always be called before attrd_send_message is called + * to broadcast to the peers to ensure that we know what replies we are + * waiting on. Otherwise, it is possible the peer could finish and confirm + * before we know to expect it. + * + * \param[in] request The request that is awaiting confirmations + * \param[in] fn A function to be run after all confirmations are received + */ +void +attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn) +{ + struct confirmation_action *action = NULL; + GHashTableIter iter; + gpointer host, ver; + GList *respondents = NULL; + int callid; + + if (expected_confirmations == NULL) { + expected_confirmations = pcmk__intkey_table((GDestroyNotify) free_action); + } + + if (crm_element_value_int(request->xml, XML_LRM_ATTR_CALLID, &callid) == -1) { + crm_err("Could not get callid from xml"); + return; + } + + if (pcmk__intkey_table_lookup(expected_confirmations, callid)) { + crm_err("Already waiting on confirmations for call id %d", callid); + return; + } + + g_hash_table_iter_init(&iter, peer_protocol_vers); + while (g_hash_table_iter_next(&iter, &host, &ver)) { + if (ATTRD_SUPPORTS_CONFIRMATION(GPOINTER_TO_INT(ver))) { + char *s = strdup((char *) host); + + CRM_ASSERT(s != NULL); + respondents = g_list_prepend(respondents, s); + } + } + + action = calloc(1, sizeof(struct confirmation_action)); + CRM_ASSERT(action != NULL); + + action->respondents = respondents; + action->fn = fn; + action->xml = copy_xml(request->xml); + + action->client_id = strdup(request->ipc_client->id); + CRM_ASSERT(action->client_id != NULL); + + action->ipc_id = request->ipc_id; + action->flags = request->flags; + + action->timer = mainloop_timer_add(NULL, 15000, FALSE, confirmation_timeout_cb, action); + mainloop_timer_start(action->timer); + + pcmk__intkey_table_insert(expected_confirmations, callid, action); + crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(respondents)); + crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); +} + +void +attrd_free_confirmations(void) +{ + if (expected_confirmations != NULL) { + g_hash_table_destroy(expected_confirmations); + expected_confirmations = NULL; + } +} + +/*! + * \internal + * \brief Process a confirmation message from a peer attrd + * + * This function is called every time a PCMK__ATTRD_CMD_CONFIRM message is + * received from a peer attrd. If this is the last confirmation we are waiting + * on for a given operation, the registered action will be called. + * + * \param[in] callid The unique callid for the XML IPC request + * \param[in] host The confirming peer attrd's uname + */ +void +attrd_handle_confirmation(int callid, const char *host) +{ + struct confirmation_action *action = NULL; + GList *node = NULL; + + if (expected_confirmations == NULL) { + return; + } + + action = pcmk__intkey_table_lookup(expected_confirmations, callid); + if (action == NULL) { + return; + } + + node = g_list_find_custom(action->respondents, host, (GCompareFunc) strcasecmp); + + if (node == NULL) { + return; + } + + action->respondents = g_list_remove(action->respondents, node->data); + crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(action->respondents)); + + if (action->respondents == NULL) { + action->fn(action->xml); + pcmk__intkey_table_remove(expected_confirmations, callid); + crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); + } +} diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c new file mode 100644 index 0000000..7de8dd9 --- /dev/null +++ b/daemons/attrd/attrd_utils.c @@ -0,0 +1,362 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <stdio.h> +#include <stdbool.h> +#include <errno.h> +#include <glib.h> +#include <regex.h> +#include <sys/types.h> + +#include <crm/crm.h> +#include <crm/common/ipc_internal.h> +#include <crm/common/mainloop.h> +#include <crm/msg_xml.h> + +#include "pacemaker-attrd.h" + +cib_t *the_cib = NULL; + +static bool requesting_shutdown = false; +static bool shutting_down = false; +static GMainLoop *mloop = NULL; + +/* A hash table storing information on the protocol version of each peer attrd. + * The key is the peer's uname, and the value is the protocol version number. + */ +GHashTable *peer_protocol_vers = NULL; + +/*! + * \internal + * \brief Set requesting_shutdown state + */ +void +attrd_set_requesting_shutdown(void) +{ + requesting_shutdown = true; +} + +/*! + * \internal + * \brief Clear requesting_shutdown state + */ +void +attrd_clear_requesting_shutdown(void) +{ + requesting_shutdown = false; +} + +/*! + * \internal + * \brief Check whether we're currently requesting shutdown + * + * \return true if requesting shutdown, false otherwise + */ +bool +attrd_requesting_shutdown(void) +{ + return requesting_shutdown; +} + +/*! + * \internal + * \brief Check whether we're currently shutting down + * + * \return true if shutting down, false otherwise + */ +bool +attrd_shutting_down(void) +{ + return shutting_down; +} + +/*! + * \internal + * \brief Exit (using mainloop or not, as appropriate) + * + * \param[in] nsig Ignored + */ +void +attrd_shutdown(int nsig) +{ + // Tell various functions not to do anthing + shutting_down = true; + + // Don't respond to signals while shutting down + mainloop_destroy_signal(SIGTERM); + mainloop_destroy_signal(SIGCHLD); + mainloop_destroy_signal(SIGPIPE); + mainloop_destroy_signal(SIGUSR1); + mainloop_destroy_signal(SIGUSR2); + mainloop_destroy_signal(SIGTRAP); + + attrd_free_waitlist(); + attrd_free_confirmations(); + + if (peer_protocol_vers != NULL) { + g_hash_table_destroy(peer_protocol_vers); + peer_protocol_vers = NULL; + } + + if ((mloop == NULL) || !g_main_loop_is_running(mloop)) { + /* If there's no main loop active, just exit. This should be possible + * only if we get SIGTERM in brief windows at start-up and shutdown. + */ + crm_exit(CRM_EX_OK); + } else { + g_main_loop_quit(mloop); + g_main_loop_unref(mloop); + } +} + +/*! + * \internal + * \brief Create a main loop for attrd + */ +void +attrd_init_mainloop(void) +{ + mloop = g_main_loop_new(NULL, FALSE); +} + +/*! + * \internal + * \brief Run attrd main loop + */ +void +attrd_run_mainloop(void) +{ + g_main_loop_run(mloop); +} + +void +attrd_cib_disconnect(void) +{ + CRM_CHECK(the_cib != NULL, return); + the_cib->cmds->del_notify_callback(the_cib, T_CIB_REPLACE_NOTIFY, attrd_cib_replaced_cb); + the_cib->cmds->del_notify_callback(the_cib, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb); + cib__clean_up_connection(&the_cib); +} + +void +attrd_cib_replaced_cb(const char *event, xmlNode * msg) +{ + int change_section = cib_change_section_nodes | cib_change_section_status | cib_change_section_alerts; + + if (attrd_requesting_shutdown() || attrd_shutting_down()) { + return; + } + + crm_element_value_int(msg, F_CIB_CHANGE_SECTION, &change_section); + + if (attrd_election_won()) { + if (change_section & (cib_change_section_nodes | cib_change_section_status)) { + crm_notice("Updating all attributes after %s event", event); + attrd_write_attributes(true, false); + } + } + + if (change_section & cib_change_section_alerts) { + // Check for changes in alerts + mainloop_set_trigger(attrd_config_read); + } +} + +/* strlen("value") */ +#define plus_plus_len (5) + +/*! + * \internal + * \brief Check whether an attribute value should be expanded + * + * \param[in] value Attribute value to check + * + * \return true if value needs expansion, false otherwise + */ +bool +attrd_value_needs_expansion(const char *value) +{ + return ((strlen(value) >= (plus_plus_len + 2)) + && (value[plus_plus_len] == '+') + && ((value[plus_plus_len + 1] == '+') + || (value[plus_plus_len + 1] == '='))); +} + +/*! + * \internal + * \brief Expand an increment expression into an integer + * + * \param[in] value Attribute increment expression to expand + * \param[in] old_value Previous value of attribute + * + * \return Expanded value + */ +int +attrd_expand_value(const char *value, const char *old_value) +{ + int offset = 1; + int int_value = char2score(old_value); + + if (value[plus_plus_len + 1] != '+') { + const char *offset_s = value + (plus_plus_len + 2); + + offset = char2score(offset_s); + } + int_value += offset; + + if (int_value > INFINITY) { + int_value = INFINITY; + } + return int_value; +} + +/*! + * \internal + * \brief Create regular expression matching failure-related attributes + * + * \param[out] regex Where to store created regular expression + * \param[in] rsc Name of resource to clear (or NULL for all) + * \param[in] op Operation to clear if rsc is specified (or NULL for all) + * \param[in] interval_ms Interval of operation to clear if op is specified + * + * \return pcmk_ok on success, -EINVAL if arguments are invalid + * + * \note The caller is responsible for freeing the result with regfree(). + */ +int +attrd_failure_regex(regex_t *regex, const char *rsc, const char *op, + guint interval_ms) +{ + char *pattern = NULL; + int rc; + + /* Create a pattern that matches desired attributes */ + + if (rsc == NULL) { + pattern = strdup(ATTRD_RE_CLEAR_ALL); + } else if (op == NULL) { + pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc); + } else { + pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP, rsc, op, interval_ms); + } + + /* Compile pattern into regular expression */ + crm_trace("Clearing attributes matching %s", pattern); + rc = regcomp(regex, pattern, REG_EXTENDED|REG_NOSUB); + free(pattern); + + return (rc == 0)? pcmk_ok : -EINVAL; +} + +void +attrd_free_attribute_value(gpointer data) +{ + attribute_value_t *v = data; + + free(v->nodename); + free(v->current); + free(v->requested); + free(v); +} + +void +attrd_free_attribute(gpointer data) +{ + attribute_t *a = data; + if(a) { + free(a->id); + free(a->set_id); + free(a->set_type); + free(a->uuid); + free(a->user); + + mainloop_timer_del(a->timer); + g_hash_table_destroy(a->values); + + free(a); + } +} + +/*! + * \internal + * \brief When a peer node leaves the cluster, stop tracking its protocol version. + * + * \param[in] host The peer node's uname to be removed + */ +void +attrd_remove_peer_protocol_ver(const char *host) +{ + if (peer_protocol_vers != NULL) { + g_hash_table_remove(peer_protocol_vers, host); + } +} + +/*! + * \internal + * \brief When a peer node broadcasts a message with its protocol version, keep + * track of that information. + * + * We keep track of each peer's protocol version so we know which peers to + * expect confirmation messages from when handling cluster-wide sync points. + * We additionally keep track of the lowest protocol version supported by all + * peers so we know when we can send IPC messages containing more than one + * request. + * + * \param[in] host The peer node's uname to be tracked + * \param[in] value The peer node's protocol version + */ +void +attrd_update_minimum_protocol_ver(const char *host, const char *value) +{ + int ver; + + if (peer_protocol_vers == NULL) { + peer_protocol_vers = pcmk__strkey_table(free, NULL); + } + + pcmk__scan_min_int(value, &ver, 0); + + if (ver > 0) { + char *host_name = strdup(host); + + /* Record the peer attrd's protocol version. */ + CRM_ASSERT(host_name != NULL); + g_hash_table_insert(peer_protocol_vers, host_name, GINT_TO_POINTER(ver)); + + /* If the protocol version is a new minimum, record it as such. */ + if (minimum_protocol_version == -1 || ver < minimum_protocol_version) { + minimum_protocol_version = ver; + crm_trace("Set minimum attrd protocol version to %d", + minimum_protocol_version); + } + } +} + +void +attrd_copy_xml_attributes(xmlNode *src, xmlNode *dest) +{ + /* Copy attributes from the wrapper parent node into the child node. + * We can't just use copy_in_properties because we want to skip any + * attributes that are already set on the child. For instance, if + * we were told to use a specific node, there will already be a node + * attribute on the child. Copying the parent's node attribute over + * could result in the wrong value. + */ + for (xmlAttrPtr a = pcmk__xe_first_attr(src); a != NULL; a = a->next) { + const char *p_name = (const char *) a->name; + const char *p_value = ((a == NULL) || (a->children == NULL)) ? NULL : + (const char *) a->children->content; + + if (crm_element_value(dest, p_name) == NULL) { + crm_xml_add(dest, p_name, p_value); + } + } +} diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c new file mode 100644 index 0000000..037825b --- /dev/null +++ b/daemons/attrd/pacemaker-attrd.c @@ -0,0 +1,358 @@ +/* + * Copyright 2013-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <sys/param.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#include <stdlib.h> +#include <errno.h> +#include <fcntl.h> + +#include <crm/crm.h> +#include <crm/cib/internal.h> +#include <crm/msg_xml.h> +#include <crm/pengine/rules.h> +#include <crm/common/cmdline_internal.h> +#include <crm/common/iso8601.h> +#include <crm/common/ipc.h> +#include <crm/common/ipc_internal.h> +#include <crm/common/output_internal.h> +#include <crm/common/xml.h> +#include <crm/cluster/internal.h> + +#include <crm/common/attrd_internal.h> +#include "pacemaker-attrd.h" + +#define SUMMARY "daemon for managing Pacemaker node attributes" + +gboolean stand_alone = FALSE; +gchar **log_files = NULL; + +static GOptionEntry entries[] = { + { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone, + "(Advanced use only) Run in stand-alone mode", NULL }, + + { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, + &log_files, "Send logs to the additional named logfile", NULL }, + + { NULL } +}; + +static pcmk__output_t *out = NULL; + +static pcmk__supported_format_t formats[] = { + PCMK__SUPPORTED_FORMAT_NONE, + PCMK__SUPPORTED_FORMAT_TEXT, + PCMK__SUPPORTED_FORMAT_XML, + { NULL, NULL, NULL } +}; + +lrmd_t *the_lrmd = NULL; +crm_cluster_t *attrd_cluster = NULL; +crm_trigger_t *attrd_config_read = NULL; +crm_exit_t attrd_exit_status = CRM_EX_OK; + +static void +attrd_cib_destroy_cb(gpointer user_data) +{ + cib_t *conn = user_data; + + conn->cmds->signoff(conn); /* Ensure IPC is cleaned up */ + + if (attrd_shutting_down()) { + crm_info("Connection disconnection complete"); + + } else { + /* eventually this should trigger a reconnect, not a shutdown */ + crm_crit("Lost connection to the CIB manager, shutting down"); + attrd_exit_status = CRM_EX_DISCONNECT; + attrd_shutdown(0); + } + + return; +} + +static void +attrd_erase_cb(xmlNode *msg, int call_id, int rc, xmlNode *output, + void *user_data) +{ + do_crm_log_unlikely((rc? LOG_NOTICE : LOG_DEBUG), + "Cleared transient attributes: %s " + CRM_XS " xpath=%s rc=%d", + pcmk_strerror(rc), (char *) user_data, rc); +} + +#define XPATH_TRANSIENT "//node_state[@uname='%s']/" XML_TAG_TRANSIENT_NODEATTRS + +/*! + * \internal + * \brief Wipe all transient attributes for this node from the CIB + * + * Clear any previous transient node attributes from the CIB. This is + * normally done by the DC's controller when this node leaves the cluster, but + * this handles the case where the node restarted so quickly that the + * cluster layer didn't notice. + * + * \todo If pacemaker-attrd respawns after crashing (see PCMK_respawned), + * ideally we'd skip this and sync our attributes from the writer. + * However, currently we reject any values for us that the writer has, in + * attrd_peer_update(). + */ +static void +attrd_erase_attrs(void) +{ + int call_id; + char *xpath = crm_strdup_printf(XPATH_TRANSIENT, attrd_cluster->uname); + + crm_info("Clearing transient attributes from CIB " CRM_XS " xpath=%s", + xpath); + + call_id = the_cib->cmds->remove(the_cib, xpath, NULL, cib_xpath); + the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, xpath, + "attrd_erase_cb", attrd_erase_cb, + free); +} + +static int +attrd_cib_connect(int max_retry) +{ + static int attempts = 0; + + int rc = -ENOTCONN; + + the_cib = cib_new(); + if (the_cib == NULL) { + return -ENOTCONN; + } + + do { + if(attempts > 0) { + sleep(attempts); + } + + attempts++; + crm_debug("Connection attempt %d to the CIB manager", attempts); + rc = the_cib->cmds->signon(the_cib, T_ATTRD, cib_command); + + } while(rc != pcmk_ok && attempts < max_retry); + + if (rc != pcmk_ok) { + crm_err("Connection to the CIB manager failed: %s " CRM_XS " rc=%d", + pcmk_strerror(rc), rc); + goto cleanup; + } + + crm_debug("Connected to the CIB manager after %d attempts", attempts); + + rc = the_cib->cmds->set_connection_dnotify(the_cib, attrd_cib_destroy_cb); + if (rc != pcmk_ok) { + crm_err("Could not set disconnection callback"); + goto cleanup; + } + + rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_REPLACE_NOTIFY, attrd_cib_replaced_cb); + if(rc != pcmk_ok) { + crm_err("Could not set CIB notification callback"); + goto cleanup; + } + + rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb); + if (rc != pcmk_ok) { + crm_err("Could not set CIB notification callback (update)"); + goto cleanup; + } + + return pcmk_ok; + + cleanup: + cib__clean_up_connection(&the_cib); + return -ENOTCONN; +} + +/*! + * \internal + * \brief Prepare the CIB after cluster is connected + */ +static void +attrd_cib_init(void) +{ + // We have no attribute values in memory, wipe the CIB to match + attrd_erase_attrs(); + + // Set a trigger for reading the CIB (for the alerts section) + attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL); + + // Always read the CIB at start-up + mainloop_set_trigger(attrd_config_read); +} + +static bool +ipc_already_running(void) +{ + pcmk_ipc_api_t *old_instance = NULL; + int rc = pcmk_rc_ok; + + rc = pcmk_new_ipc_api(&old_instance, pcmk_ipc_attrd); + if (rc != pcmk_rc_ok) { + return false; + } + + rc = pcmk_connect_ipc(old_instance, pcmk_ipc_dispatch_sync); + if (rc != pcmk_rc_ok) { + pcmk_free_ipc_api(old_instance); + return false; + } + + pcmk_disconnect_ipc(old_instance); + pcmk_free_ipc_api(old_instance); + return true; +} + +static GOptionContext * +build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { + GOptionContext *context = NULL; + + context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); + pcmk__add_main_args(context, entries); + return context; +} + +int +main(int argc, char **argv) +{ + int rc = pcmk_rc_ok; + + GError *error = NULL; + bool initialized = false; + + GOptionGroup *output_group = NULL; + pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); + gchar **processed_args = pcmk__cmdline_preproc(argv, NULL); + GOptionContext *context = build_arg_context(args, &output_group); + + attrd_init_mainloop(); + crm_log_preinit(NULL, argc, argv); + mainloop_add_signal(SIGTERM, attrd_shutdown); + + pcmk__register_formats(output_group, formats); + if (!g_option_context_parse_strv(context, &processed_args, &error)) { + attrd_exit_status = CRM_EX_USAGE; + goto done; + } + + rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); + if ((rc != pcmk_rc_ok) || (out == NULL)) { + attrd_exit_status = CRM_EX_ERROR; + g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, + "Error creating output format %s: %s", + args->output_ty, pcmk_rc_str(rc)); + goto done; + } + + if (args->version) { + out->version(out, false); + goto done; + } + + // Open additional log files + pcmk__add_logfiles(log_files, out); + + crm_log_init(T_ATTRD, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); + crm_notice("Starting Pacemaker node attribute manager%s", + stand_alone ? " in standalone mode" : ""); + + if (ipc_already_running()) { + const char *msg = "pacemaker-attrd is already active, aborting startup"; + + attrd_exit_status = CRM_EX_OK; + g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, "%s", msg); + crm_err(msg); + goto done; + } + + initialized = true; + + attributes = pcmk__strkey_table(NULL, attrd_free_attribute); + + /* Connect to the CIB before connecting to the cluster or listening for IPC. + * This allows us to assume the CIB is connected whenever we process a + * cluster or IPC message (which also avoids start-up race conditions). + */ + if (!stand_alone) { + if (attrd_cib_connect(30) != pcmk_ok) { + attrd_exit_status = CRM_EX_FATAL; + g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, + "Could not connect to the CIB"); + goto done; + } + crm_info("CIB connection active"); + } + + if (attrd_cluster_connect() != pcmk_ok) { + attrd_exit_status = CRM_EX_FATAL; + g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, + "Could not connect to the cluster"); + goto done; + } + crm_info("Cluster connection active"); + + // Initialization that requires the cluster to be connected + attrd_election_init(); + + if (!stand_alone) { + attrd_cib_init(); + } + + /* Set a private attribute for ourselves with the protocol version we + * support. This lets all nodes determine the minimum supported version + * across all nodes. It also ensures that the writer learns our node name, + * so it can send our attributes to the CIB. + */ + attrd_broadcast_protocol(); + + attrd_init_ipc(); + crm_notice("Pacemaker node attribute manager successfully started and accepting connections"); + attrd_run_mainloop(); + + done: + if (initialized) { + crm_info("Shutting down attribute manager"); + + attrd_election_fini(); + attrd_ipc_fini(); + attrd_lrmd_disconnect(); + + if (!stand_alone) { + attrd_cib_disconnect(); + } + + attrd_free_waitlist(); + pcmk_cluster_free(attrd_cluster); + g_hash_table_destroy(attributes); + } + + g_strfreev(processed_args); + pcmk__free_arg_context(context); + + g_strfreev(log_files); + + pcmk__output_and_clear_error(&error, out); + + if (out != NULL) { + out->finish(out, attrd_exit_status, true, NULL); + pcmk__output_free(out); + } + pcmk__unregister_formats(); + crm_exit(attrd_exit_status); +} diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h new file mode 100644 index 0000000..329fb5a --- /dev/null +++ b/daemons/attrd/pacemaker-attrd.h @@ -0,0 +1,216 @@ +/* + * Copyright 2013-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#ifndef PACEMAKER_ATTRD__H +# define PACEMAKER_ATTRD__H + +#include <regex.h> +#include <glib.h> +#include <crm/crm.h> +#include <crm/cluster.h> +#include <crm/cluster/election_internal.h> +#include <crm/common/messages_internal.h> +#include <crm/cib/internal.h> + +/* + * Legacy attrd (all pre-1.1.11 Pacemaker versions, plus all versions when used + * with the no-longer-supported CMAN or corosync-plugin stacks) is unversioned. + * + * With atomic attrd, each attrd will send ATTRD_PROTOCOL_VERSION with every + * peer request and reply. As of Pacemaker 2.0.0, at start-up each attrd will + * also set a private attribute for itself with its version, so any attrd can + * determine the minimum version supported by all peers. + * + * Protocol Pacemaker Significant changes + * -------- --------- ------------------- + * 1 1.1.11 PCMK__ATTRD_CMD_UPDATE (PCMK__XA_ATTR_NAME only), + * PCMK__ATTRD_CMD_PEER_REMOVE, PCMK__ATTRD_CMD_REFRESH, + * PCMK__ATTRD_CMD_FLUSH, PCMK__ATTRD_CMD_SYNC, + * PCMK__ATTRD_CMD_SYNC_RESPONSE + * 1 1.1.13 PCMK__ATTRD_CMD_UPDATE (with PCMK__XA_ATTR_PATTERN), + * PCMK__ATTRD_CMD_QUERY + * 1 1.1.15 PCMK__ATTRD_CMD_UPDATE_BOTH, + * PCMK__ATTRD_CMD_UPDATE_DELAY + * 2 1.1.17 PCMK__ATTRD_CMD_CLEAR_FAILURE + * 3 2.1.1 PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes + * 4 2.1.5 Multiple attributes can be updated in a single IPC + * message + * 5 2.1.5 Peers can request confirmation of a sent message + */ +#define ATTRD_PROTOCOL_VERSION "5" + +#define ATTRD_SUPPORTS_MULTI_MESSAGE(x) ((x) >= 4) +#define ATTRD_SUPPORTS_CONFIRMATION(x) ((x) >= 5) + +#define attrd_send_ack(client, id, flags) \ + pcmk__ipc_send_ack((client), (id), (flags), "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_INDETERMINATE) + +void attrd_init_mainloop(void); +void attrd_run_mainloop(void); + +void attrd_set_requesting_shutdown(void); +void attrd_clear_requesting_shutdown(void); +void attrd_free_waitlist(void); +bool attrd_requesting_shutdown(void); +bool attrd_shutting_down(void); +void attrd_shutdown(int nsig); +void attrd_init_ipc(void); +void attrd_ipc_fini(void); + +void attrd_cib_disconnect(void); + +bool attrd_value_needs_expansion(const char *value); +int attrd_expand_value(const char *value, const char *old_value); + +/* regular expression to clear failures of all resources */ +#define ATTRD_RE_CLEAR_ALL \ + "^(" PCMK__FAIL_COUNT_PREFIX "|" PCMK__LAST_FAILURE_PREFIX ")-" + +/* regular expression to clear failure of all operations for one resource + * (format takes resource name) + * + * @COMPAT attributes set < 1.1.17: + * also match older attributes that do not have the operation part + */ +#define ATTRD_RE_CLEAR_ONE ATTRD_RE_CLEAR_ALL "%s(#.+_[0-9]+)?$" + +/* regular expression to clear failure of one operation for one resource + * (format takes resource name, operation name, and interval) + * + * @COMPAT attributes set < 1.1.17: + * also match older attributes that do not have the operation part + */ +#define ATTRD_RE_CLEAR_OP ATTRD_RE_CLEAR_ALL "%s(#%s_%u)?$" + +int attrd_failure_regex(regex_t *regex, const char *rsc, const char *op, + guint interval_ms); + +extern cib_t *the_cib; + +/* Alerts */ + +extern lrmd_t *the_lrmd; +extern crm_trigger_t *attrd_config_read; + +void attrd_lrmd_disconnect(void); +gboolean attrd_read_options(gpointer user_data); +void attrd_cib_replaced_cb(const char *event, xmlNode * msg); +void attrd_cib_updated_cb(const char *event, xmlNode *msg); +int attrd_send_attribute_alert(const char *node, int nodeid, + const char *attr, const char *value); + +// Elections +void attrd_election_init(void); +void attrd_election_fini(void); +void attrd_start_election_if_needed(void); +bool attrd_election_won(void); +void attrd_handle_election_op(const crm_node_t *peer, xmlNode *xml); +bool attrd_check_for_new_writer(const crm_node_t *peer, const xmlNode *xml); +void attrd_declare_winner(void); +void attrd_remove_voter(const crm_node_t *peer); +void attrd_xml_add_writer(xmlNode *xml); + +typedef struct attribute_s { + char *uuid; /* TODO: Remove if at all possible */ + char *id; + char *set_id; + char *set_type; + GHashTable *values; + int update; + int timeout_ms; + + /* TODO: refactor these three as a bitmask */ + bool changed; /* whether attribute value has changed since last write */ + bool unknown_peer_uuids; /* whether we know we're missing a peer uuid */ + gboolean is_private; /* whether to keep this attribute out of the CIB */ + + mainloop_timer_t *timer; + + char *user; + + gboolean force_write; /* Flag for updating attribute by ignoring delay */ + +} attribute_t; + +typedef struct attribute_value_s { + uint32_t nodeid; + gboolean is_remote; + char *nodename; + char *current; + char *requested; + gboolean seen; +} attribute_value_t; + +extern crm_cluster_t *attrd_cluster; +extern GHashTable *attributes; +extern GHashTable *peer_protocol_vers; + +#define CIB_OP_TIMEOUT_S 120 + +int attrd_cluster_connect(void); +void attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, + bool filter); +void attrd_peer_sync(crm_node_t *peer, xmlNode *xml); +void attrd_peer_remove(const char *host, bool uncache, const char *source); +void attrd_peer_clear_failure(pcmk__request_t *request); +void attrd_peer_sync_response(const crm_node_t *peer, bool peer_won, + xmlNode *xml); + +void attrd_broadcast_protocol(void); +xmlNode *attrd_client_peer_remove(pcmk__request_t *request); +xmlNode *attrd_client_clear_failure(pcmk__request_t *request); +xmlNode *attrd_client_update(pcmk__request_t *request); +xmlNode *attrd_client_refresh(pcmk__request_t *request); +xmlNode *attrd_client_query(pcmk__request_t *request); +gboolean attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm); + +xmlNode *attrd_add_value_xml(xmlNode *parent, const attribute_t *a, + const attribute_value_t *v, bool force_write); +void attrd_clear_value_seen(void); +void attrd_free_attribute(gpointer data); +void attrd_free_attribute_value(gpointer data); +attribute_t *attrd_populate_attribute(xmlNode *xml, const char *attr); + +void attrd_write_attribute(attribute_t *a, bool ignore_delay); +void attrd_write_attributes(bool all, bool ignore_delay); +void attrd_write_or_elect_attribute(attribute_t *a); + +extern int minimum_protocol_version; +void attrd_remove_peer_protocol_ver(const char *host); +void attrd_update_minimum_protocol_ver(const char *host, const char *value); + +mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr); + +void attrd_unregister_handlers(void); +void attrd_handle_request(pcmk__request_t *request); + +enum attrd_sync_point { + attrd_sync_point_local, + attrd_sync_point_cluster, +}; + +typedef int (*attrd_confirmation_action_fn)(xmlNode *); + +void attrd_add_client_to_waitlist(pcmk__request_t *request); +void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml); +int attrd_cluster_sync_point_update(xmlNode *xml); +void attrd_do_not_expect_from_peer(const char *host); +void attrd_do_not_wait_for_client(pcmk__client_t *client); +void attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn); +void attrd_free_confirmations(void); +void attrd_handle_confirmation(int callid, const char *host); +void attrd_remove_client_from_waitlist(pcmk__client_t *client); +const char *attrd_request_sync_point(xmlNode *xml); +bool attrd_request_has_sync_point(xmlNode *xml); + +void attrd_copy_xml_attributes(xmlNode *src, xmlNode *dest); + +extern gboolean stand_alone; + +#endif /* PACEMAKER_ATTRD__H */ |