summaryrefslogtreecommitdiffstats
path: root/daemons/controld/controld_join_dc.c
diff options
context:
space:
mode:
Diffstat (limited to 'daemons/controld/controld_join_dc.c')
-rw-r--r--daemons/controld/controld_join_dc.c987
1 files changed, 987 insertions, 0 deletions
diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
new file mode 100644
index 0000000..f82b132
--- /dev/null
+++ b/daemons/controld/controld_join_dc.c
@@ -0,0 +1,987 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <crm/crm.h>
+
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/cluster.h>
+
+#include <pacemaker-controld.h>
+
+static char *max_generation_from = NULL;
+static xmlNodePtr max_generation_xml = NULL;
+
+/*!
+ * \internal
+ * \brief Nodes from which a CIB sync has failed since the peer joined
+ *
+ * This table is of the form (<tt>node_name -> join_id</tt>). \p node_name is
+ * the name of a client node from which a CIB \p sync_from() call has failed in
+ * \p do_dc_join_finalize() since the client joined the cluster as a peer.
+ * \p join_id is the ID of the join round in which the \p sync_from() failed,
+ * and is intended for use in nack log messages.
+ */
+static GHashTable *failed_sync_nodes = NULL;
+
+void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
+void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
+gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
+
+/* Numeric counter used to identify join rounds (an unsigned int would be
+ * appropriate, except we get and set it in XML as int)
+ */
+static int current_join_id = 0;
+
+/*!
+ * \internal
+ * \brief Destroy the hash table containing failed sync nodes
+ */
+void
+controld_destroy_failed_sync_table(void)
+{
+ if (failed_sync_nodes != NULL) {
+ g_hash_table_destroy(failed_sync_nodes);
+ failed_sync_nodes = NULL;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Remove a node from the failed sync nodes table if present
+ *
+ * \param[in] node_name Node name to remove
+ */
+void
+controld_remove_failed_sync_node(const char *node_name)
+{
+ if (failed_sync_nodes != NULL) {
+ g_hash_table_remove(failed_sync_nodes, (gchar *) node_name);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add to a hash table a node whose CIB failed to sync
+ *
+ * \param[in] node_name Name of node whose CIB failed to sync
+ * \param[in] join_id Join round when the failure occurred
+ */
+static void
+record_failed_sync_node(const char *node_name, gint join_id)
+{
+ if (failed_sync_nodes == NULL) {
+ failed_sync_nodes = pcmk__strikey_table(g_free, NULL);
+ }
+
+ /* If the node is already in the table then we failed to nack it during the
+ * filter offer step
+ */
+ CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name),
+ GINT_TO_POINTER(join_id)));
+}
+
+/*!
+ * \internal
+ * \brief Look up a node name in the failed sync table
+ *
+ * \param[in] node_name Name of node to look up
+ * \param[out] join_id Where to store the join ID of when the sync failed
+ *
+ * \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the
+ * node name was found, or \p pcmk_rc_node_unknown otherwise.
+ * \note \p *join_id is set to -1 if the node is not found.
+ */
+static int
+lookup_failed_sync_node(const char *node_name, gint *join_id)
+{
+ *join_id = -1;
+
+ if (failed_sync_nodes != NULL) {
+ gpointer result = g_hash_table_lookup(failed_sync_nodes,
+ (gchar *) node_name);
+ if (result != NULL) {
+ *join_id = GPOINTER_TO_INT(result);
+ return pcmk_rc_ok;
+ }
+ }
+ return pcmk_rc_node_unknown;
+}
+
+void
+crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase)
+{
+ enum crm_join_phase last = 0;
+
+ CRM_CHECK(node != NULL, return);
+
+ /* Remote nodes do not participate in joins */
+ if (pcmk_is_set(node->flags, crm_remote_node)) {
+ return;
+ }
+
+ last = node->join;
+
+ if(phase == last) {
+ crm_trace("Node %s join-%d phase is still %s "
+ CRM_XS " nodeid=%u source=%s",
+ node->uname, current_join_id, crm_join_phase_str(last),
+ node->id, source);
+
+ } else if ((phase <= crm_join_none) || (phase == (last + 1))) {
+ node->join = phase;
+ crm_trace("Node %s join-%d phase is now %s (was %s) "
+ CRM_XS " nodeid=%u source=%s",
+ node->uname, current_join_id, crm_join_phase_str(phase),
+ crm_join_phase_str(last), node->id, source);
+
+ } else {
+ crm_warn("Rejecting join-%d phase update for node %s because "
+ "can't go from %s to %s " CRM_XS " nodeid=%u source=%s",
+ current_join_id, node->uname, crm_join_phase_str(last),
+ crm_join_phase_str(phase), node->id, source);
+ }
+}
+
+static void
+start_join_round(void)
+{
+ GHashTableIter iter;
+ crm_node_t *peer = NULL;
+
+ crm_debug("Starting new join round join-%d", current_join_id);
+
+ g_hash_table_iter_init(&iter, crm_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
+ crm_update_peer_join(__func__, peer, crm_join_none);
+ }
+ if (max_generation_from != NULL) {
+ free(max_generation_from);
+ max_generation_from = NULL;
+ }
+ if (max_generation_xml != NULL) {
+ free_xml(max_generation_xml);
+ max_generation_xml = NULL;
+ }
+ controld_clear_fsa_input_flags(R_HAVE_CIB);
+ controld_forget_all_cib_replace_calls();
+}
+
+/*!
+ * \internal
+ * \brief Create a join message from the DC
+ *
+ * \param[in] join_op Join operation name
+ * \param[in] host_to Recipient of message
+ */
+static xmlNode *
+create_dc_message(const char *join_op, const char *host_to)
+{
+ xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD,
+ CRM_SYSTEM_DC, NULL);
+
+ /* Identify which election this is a part of */
+ crm_xml_add_int(msg, F_CRM_JOIN_ID, current_join_id);
+
+ /* Add a field specifying whether the DC is shutting down. This keeps the
+ * joining node from fencing the old DC if it becomes the new DC.
+ */
+ pcmk__xe_set_bool_attr(msg, F_CRM_DC_LEAVING,
+ pcmk_is_set(controld_globals.fsa_input_register,
+ R_SHUTDOWN));
+ return msg;
+}
+
+static void
+join_make_offer(gpointer key, gpointer value, gpointer user_data)
+{
+ xmlNode *offer = NULL;
+ crm_node_t *member = (crm_node_t *)value;
+
+ CRM_ASSERT(member != NULL);
+ if (crm_is_peer_active(member) == FALSE) {
+ crm_info("Not making join-%d offer to inactive node %s",
+ current_join_id,
+ (member->uname? member->uname : "with unknown name"));
+ if(member->expected == NULL && pcmk__str_eq(member->state, CRM_NODE_LOST, pcmk__str_casei)) {
+ /* You would think this unsafe, but in fact this plus an
+ * active resource is what causes it to be fenced.
+ *
+ * Yes, this does mean that any node that dies at the same
+ * time as the old DC and is not running resource (still)
+ * won't be fenced.
+ *
+ * I'm not happy about this either.
+ */
+ pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
+ }
+ return;
+ }
+
+ if (member->uname == NULL) {
+ crm_info("Not making join-%d offer to node uuid %s with unknown name",
+ current_join_id, member->uuid);
+ return;
+ }
+
+ if (controld_globals.membership_id != crm_peer_seq) {
+ controld_globals.membership_id = crm_peer_seq;
+ crm_info("Making join-%d offers based on membership event %llu",
+ current_join_id, crm_peer_seq);
+ }
+
+ if(user_data && member->join > crm_join_none) {
+ crm_info("Not making join-%d offer to already known node %s (%s)",
+ current_join_id, member->uname,
+ crm_join_phase_str(member->join));
+ return;
+ }
+
+ crm_update_peer_join(__func__, (crm_node_t*)member, crm_join_none);
+
+ offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname);
+
+ // Advertise our feature set so the joining node can bail if not compatible
+ crm_xml_add(offer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
+
+ crm_info("Sending join-%d offer to %s", current_join_id, member->uname);
+ send_cluster_message(member, crm_msg_crmd, offer, TRUE);
+ free_xml(offer);
+
+ crm_update_peer_join(__func__, member, crm_join_welcomed);
+}
+
+/* A_DC_JOIN_OFFER_ALL */
+void
+do_dc_join_offer_all(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ int count;
+
+ /* Reset everyone's status back to down or in_ccm in the CIB.
+ * Any nodes that are active in the CIB but not in the cluster membership
+ * will be seen as offline by the scheduler anyway.
+ */
+ current_join_id++;
+ start_join_round();
+
+ update_dc(NULL);
+ if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
+ crm_info("A new node joined the cluster");
+ }
+ g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
+
+ count = crmd_join_phase_count(crm_join_welcomed);
+ crm_info("Waiting on join-%d requests from %d outstanding node%s",
+ current_join_id, count, pcmk__plural_s(count));
+
+ // Don't waste time by invoking the scheduler yet
+}
+
+/* A_DC_JOIN_OFFER_ONE */
+void
+do_dc_join_offer_one(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ crm_node_t *member;
+ ha_msg_input_t *welcome = NULL;
+ int count;
+ const char *join_to = NULL;
+
+ if (msg_data->data == NULL) {
+ crm_info("Making join-%d offers to any unconfirmed nodes "
+ "because an unknown node joined", current_join_id);
+ g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
+ check_join_state(cur_state, __func__);
+ return;
+ }
+
+ welcome = fsa_typed_data(fsa_dt_ha_msg);
+ if (welcome == NULL) {
+ // fsa_typed_data() already logged an error
+ return;
+ }
+
+ join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM);
+ if (join_to == NULL) {
+ crm_err("Can't make join-%d offer to unknown node", current_join_id);
+ return;
+ }
+ member = crm_get_peer(0, join_to);
+
+ /* It is possible that a node will have been sick or starting up when the
+ * original offer was made. However, it will either re-announce itself in
+ * due course, or we can re-store the original offer on the client.
+ */
+
+ crm_update_peer_join(__func__, member, crm_join_none);
+ join_make_offer(NULL, member, NULL);
+
+ /* If the offer isn't to the local node, make an offer to the local node as
+ * well, to ensure the correct value for max_generation_from.
+ */
+ if (strcasecmp(join_to, controld_globals.our_nodename) != 0) {
+ member = crm_get_peer(0, controld_globals.our_nodename);
+ join_make_offer(NULL, member, NULL);
+ }
+
+ /* This was a genuine join request; cancel any existing transition and
+ * invoke the scheduler.
+ */
+ abort_transition(INFINITY, pcmk__graph_restart, "Node join", NULL);
+
+ count = crmd_join_phase_count(crm_join_welcomed);
+ crm_info("Waiting on join-%d requests from %d outstanding node%s",
+ current_join_id, count, pcmk__plural_s(count));
+
+ // Don't waste time by invoking the scheduler yet
+}
+
+static int
+compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
+{
+ const char *elem_l = crm_element_value(left, field);
+ const char *elem_r = crm_element_value(right, field);
+
+ long long int_elem_l;
+ long long int_elem_r;
+
+ pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
+ pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
+
+ if (int_elem_l < int_elem_r) {
+ return -1;
+
+ } else if (int_elem_l > int_elem_r) {
+ return 1;
+ }
+
+ return 0;
+}
+
+/* A_DC_JOIN_PROCESS_REQ */
+void
+do_dc_join_filter_offer(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ xmlNode *generation = NULL;
+
+ int cmp = 0;
+ int join_id = -1;
+ int count = 0;
+ gint value = 0;
+ gboolean ack_nack_bool = TRUE;
+ ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
+
+ const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
+ const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE);
+ const char *join_version = crm_element_value(join_ack->msg,
+ XML_ATTR_CRM_VERSION);
+ crm_node_t *join_node = NULL;
+
+ if (join_from == NULL) {
+ crm_err("Ignoring invalid join request without node name");
+ return;
+ }
+ join_node = crm_get_peer(0, join_from);
+
+ crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
+ if (join_id != current_join_id) {
+ crm_debug("Ignoring join-%d request from %s because we are on join-%d",
+ join_id, join_from, current_join_id);
+ check_join_state(cur_state, __func__);
+ return;
+ }
+
+ generation = join_ack->xml;
+ if (max_generation_xml != NULL && generation != NULL) {
+ int lpc = 0;
+
+ const char *attributes[] = {
+ XML_ATTR_GENERATION_ADMIN,
+ XML_ATTR_GENERATION,
+ XML_ATTR_NUMUPDATES,
+ };
+
+ for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) {
+ cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]);
+ }
+ }
+
+ if (ref == NULL) {
+ ref = "none"; // for logging only
+ }
+
+ if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) {
+ crm_err("Rejecting join-%d request from node %s because we failed to "
+ "sync its CIB in join-%d " CRM_XS " ref=%s",
+ join_id, join_from, value, ref);
+ ack_nack_bool = FALSE;
+
+ } else if (!crm_is_peer_active(join_node)) {
+ if (match_down_event(join_from) != NULL) {
+ /* The join request was received after the node was fenced or
+ * otherwise shutdown in a way that we're aware of. No need to log
+ * an error in this rare occurrence; we know the client was recently
+ * shut down, and receiving a lingering in-flight request is not
+ * cause for alarm.
+ */
+ crm_debug("Rejecting join-%d request from inactive node %s "
+ CRM_XS " ref=%s", join_id, join_from, ref);
+ } else {
+ crm_err("Rejecting join-%d request from inactive node %s "
+ CRM_XS " ref=%s", join_id, join_from, ref);
+ }
+ ack_nack_bool = FALSE;
+
+ } else if (generation == NULL) {
+ crm_err("Rejecting invalid join-%d request from node %s "
+ "missing CIB generation " CRM_XS " ref=%s",
+ join_id, join_from, ref);
+ ack_nack_bool = FALSE;
+
+ } else if ((join_version == NULL)
+ || !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
+ crm_err("Rejecting join-%d request from node %s because feature set %s"
+ " is incompatible with ours (%s) " CRM_XS " ref=%s",
+ join_id, join_from, (join_version? join_version : "pre-3.1.0"),
+ CRM_FEATURE_SET, ref);
+ ack_nack_bool = FALSE;
+
+ } else if (max_generation_xml == NULL) {
+ const char *validation = crm_element_value(generation,
+ XML_ATTR_VALIDATION);
+
+ if (get_schema_version(validation) < 0) {
+ crm_err("Rejecting join-%d request from %s (with first CIB "
+ "generation) due to unknown schema version %s "
+ CRM_XS " ref=%s",
+ join_id, join_from, validation, ref);
+ ack_nack_bool = FALSE;
+
+ } else {
+ crm_debug("Accepting join-%d request from %s (with first CIB "
+ "generation) " CRM_XS " ref=%s",
+ join_id, join_from, ref);
+ max_generation_xml = copy_xml(generation);
+ pcmk__str_update(&max_generation_from, join_from);
+ }
+
+ } else if ((cmp < 0)
+ || ((cmp == 0)
+ && pcmk__str_eq(join_from, controld_globals.our_nodename,
+ pcmk__str_casei))) {
+ const char *validation = crm_element_value(generation,
+ XML_ATTR_VALIDATION);
+
+ if (get_schema_version(validation) < 0) {
+ crm_err("Rejecting join-%d request from %s (with better CIB "
+ "generation than current best from %s) due to unknown "
+ "schema version %s " CRM_XS " ref=%s",
+ join_id, join_from, max_generation_from, validation, ref);
+ ack_nack_bool = FALSE;
+
+ } else {
+ crm_debug("Accepting join-%d request from %s (with better CIB "
+ "generation than current best from %s) " CRM_XS " ref=%s",
+ join_id, join_from, max_generation_from, ref);
+ crm_log_xml_debug(max_generation_xml, "Old max generation");
+ crm_log_xml_debug(generation, "New max generation");
+
+ free_xml(max_generation_xml);
+ max_generation_xml = copy_xml(join_ack->xml);
+ pcmk__str_update(&max_generation_from, join_from);
+ }
+
+ } else {
+ crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s",
+ join_id, join_from, ref);
+ }
+
+ if (!ack_nack_bool) {
+ if (compare_version(join_version, "3.17.0") < 0) {
+ /* Clients with CRM_FEATURE_SET < 3.17.0 may respawn infinitely
+ * after a nack message, don't send one
+ */
+ crm_update_peer_join(__func__, join_node, crm_join_nack_quiet);
+ } else {
+ crm_update_peer_join(__func__, join_node, crm_join_nack);
+ }
+ pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);
+
+ } else {
+ crm_update_peer_join(__func__, join_node, crm_join_integrated);
+ pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
+ }
+
+ count = crmd_join_phase_count(crm_join_integrated);
+ crm_debug("%d node%s currently integrated in join-%d",
+ count, pcmk__plural_s(count), join_id);
+
+ if (check_join_state(cur_state, __func__) == FALSE) {
+ // Don't waste time by invoking the scheduler yet
+ count = crmd_join_phase_count(crm_join_welcomed);
+ crm_debug("Waiting on join-%d requests from %d outstanding node%s",
+ join_id, count, pcmk__plural_s(count));
+ }
+}
+
+/* A_DC_JOIN_FINALIZE */
+void
+do_dc_join_finalize(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ char *sync_from = NULL;
+ int rc = pcmk_ok;
+ int count_welcomed = crmd_join_phase_count(crm_join_welcomed);
+ int count_finalizable = crmd_join_phase_count(crm_join_integrated)
+ + crmd_join_phase_count(crm_join_nack)
+ + crmd_join_phase_count(crm_join_nack_quiet);
+
+ /* This we can do straight away and avoid clients timing us out
+ * while we compute the latest CIB
+ */
+ if (count_welcomed != 0) {
+ crm_debug("Waiting on join-%d requests from %d outstanding node%s "
+ "before finalizing join", current_join_id, count_welcomed,
+ pcmk__plural_s(count_welcomed));
+ crmd_join_phase_log(LOG_DEBUG);
+ /* crmd_fsa_stall(FALSE); Needed? */
+ return;
+
+ } else if (count_finalizable == 0) {
+ crm_debug("Finalization not needed for join-%d at the current time",
+ current_join_id);
+ crmd_join_phase_log(LOG_DEBUG);
+ check_join_state(controld_globals.fsa_state, __func__);
+ return;
+ }
+
+ controld_clear_fsa_input_flags(R_HAVE_CIB);
+ if (pcmk__str_eq(max_generation_from, controld_globals.our_nodename,
+ pcmk__str_null_matches|pcmk__str_casei)) {
+ controld_set_fsa_input_flags(R_HAVE_CIB);
+ }
+
+ if (!controld_globals.transition_graph->complete) {
+ crm_warn("Delaying join-%d finalization while transition in progress",
+ current_join_id);
+ crmd_join_phase_log(LOG_DEBUG);
+ crmd_fsa_stall(FALSE);
+ return;
+ }
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
+ // Send our CIB out to everyone
+ pcmk__str_update(&sync_from, controld_globals.our_nodename);
+ crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
+ current_join_id, count_finalizable,
+ pcmk__plural_s(count_finalizable));
+ crm_log_xml_debug(max_generation_xml, "Requested CIB version");
+
+ } else {
+ // Ask for the agreed best CIB
+ pcmk__str_update(&sync_from, max_generation_from);
+ crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
+ current_join_id, count_finalizable,
+ pcmk__plural_s(count_finalizable), sync_from);
+ crm_log_xml_notice(max_generation_xml, "Requested CIB version");
+ }
+ crmd_join_phase_log(LOG_DEBUG);
+
+ rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn,
+ sync_from, NULL, cib_none);
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
+ controld_record_cib_replace_call(rc);
+ }
+ fsa_register_cib_callback(rc, sync_from, finalize_sync_callback);
+}
+
+void
+free_max_generation(void)
+{
+ free(max_generation_from);
+ max_generation_from = NULL;
+
+ free_xml(max_generation_xml);
+ max_generation_xml = NULL;
+}
+
+void
+finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ CRM_LOG_ASSERT(-EPERM != rc);
+
+ controld_forget_cib_replace_call(call_id);
+
+ if (rc != pcmk_ok) {
+ const char *sync_from = (const char *) user_data;
+
+ do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
+ "Could not sync CIB from %s in join-%d: %s",
+ sync_from, current_join_id, pcmk_strerror(rc));
+
+ if (rc != -pcmk_err_old_data) {
+ record_failed_sync_node(sync_from, current_join_id);
+ }
+
+ /* restart the whole join process */
+ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
+ __func__);
+
+ } else if (!AM_I_DC) {
+ crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
+
+ } else if (controld_globals.fsa_state != S_FINALIZE_JOIN) {
+ crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN "
+ "(%s)", current_join_id,
+ fsa_state2string(controld_globals.fsa_state));
+
+ } else {
+ controld_set_fsa_input_flags(R_HAVE_CIB);
+
+ /* make sure dc_uuid is re-set to us */
+ if (!check_join_state(controld_globals.fsa_state, __func__)) {
+ int count_finalizable = 0;
+
+ count_finalizable = crmd_join_phase_count(crm_join_integrated)
+ + crmd_join_phase_count(crm_join_nack)
+ + crmd_join_phase_count(crm_join_nack_quiet);
+
+ crm_debug("Notifying %d node%s of join-%d results",
+ count_finalizable, pcmk__plural_s(count_finalizable),
+ current_join_id);
+ g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL);
+ }
+ }
+}
+
+static void
+join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ fsa_data_t *msg_data = NULL;
+
+ if (rc == pcmk_ok) {
+ crm_debug("join-%d node history update (via CIB call %d) complete",
+ current_join_id, call_id);
+ check_join_state(controld_globals.fsa_state, __func__);
+
+ } else {
+ crm_err("join-%d node history update (via CIB call %d) failed: %s "
+ "(next transition may determine resource status incorrectly)",
+ current_join_id, call_id, pcmk_strerror(rc));
+ crm_log_xml_debug(msg, "failed");
+ register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
+ }
+}
+
+/* A_DC_JOIN_PROCESS_ACK */
+void
+do_dc_join_ack(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ int join_id = -1;
+ ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
+ enum controld_section_e section = controld_section_lrm;
+ const int cib_opts = cib_scope_local|cib_can_create;
+
+ const char *op = crm_element_value(join_ack->msg, F_CRM_TASK);
+ const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
+ crm_node_t *peer = NULL;
+
+ // Sanity checks
+ if (join_from == NULL) {
+ crm_warn("Ignoring message received without node identification");
+ return;
+ }
+ if (op == NULL) {
+ crm_warn("Ignoring message received from %s without task", join_from);
+ return;
+ }
+
+ if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
+ crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
+ op, join_from, CRM_OP_JOIN_CONFIRM);
+ return;
+ }
+
+ if (crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id) != 0) {
+ crm_warn("Ignoring join confirmation from %s without valid join ID",
+ join_from);
+ return;
+ }
+
+ peer = crm_get_peer(0, join_from);
+ if (peer->join != crm_join_finalized) {
+ crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
+ "(currently %s not %s)",
+ join_id, join_from, crm_join_phase_str(peer->join),
+ crm_join_phase_str(crm_join_finalized));
+ return;
+ }
+
+ if (join_id != current_join_id) {
+ crm_err("Rejecting join-%d confirmation from %s "
+ "because currently on join-%d",
+ join_id, join_from, current_join_id);
+ crm_update_peer_join(__func__, peer, crm_join_nack);
+ return;
+ }
+
+ crm_update_peer_join(__func__, peer, crm_join_confirmed);
+
+ /* Update CIB with node's current executor state. A new transition will be
+ * triggered later, when the CIB notifies us of the change.
+ */
+ if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
+ section = controld_section_lrm_unlocked;
+ }
+ controld_delete_node_state(join_from, section, cib_scope_local);
+ if (pcmk__str_eq(join_from, controld_globals.our_nodename,
+ pcmk__str_casei)) {
+ xmlNode *now_dc_lrmd_state = controld_query_executor_state();
+
+ if (now_dc_lrmd_state != NULL) {
+ crm_debug("Updating local node history for join-%d "
+ "from query result", join_id);
+ controld_update_cib(XML_CIB_TAG_STATUS, now_dc_lrmd_state, cib_opts,
+ join_update_complete_callback);
+ free_xml(now_dc_lrmd_state);
+ } else {
+ crm_warn("Updating local node history from join-%d confirmation "
+ "because query failed", join_id);
+ controld_update_cib(XML_CIB_TAG_STATUS, join_ack->xml, cib_opts,
+ join_update_complete_callback);
+ }
+ } else {
+ crm_debug("Updating node history for %s from join-%d confirmation",
+ join_from, join_id);
+ controld_update_cib(XML_CIB_TAG_STATUS, join_ack->xml, cib_opts,
+ join_update_complete_callback);
+ }
+}
+
+void
+finalize_join_for(gpointer key, gpointer value, gpointer user_data)
+{
+ xmlNode *acknak = NULL;
+ xmlNode *tmp1 = NULL;
+ crm_node_t *join_node = value;
+ const char *join_to = join_node->uname;
+ bool integrated = false;
+
+ switch (join_node->join) {
+ case crm_join_integrated:
+ integrated = true;
+ break;
+ case crm_join_nack:
+ case crm_join_nack_quiet:
+ break;
+ default:
+ crm_trace("Not updating non-integrated and non-nacked node %s (%s) "
+ "for join-%d", join_to,
+ crm_join_phase_str(join_node->join), current_join_id);
+ return;
+ }
+
+ /* Update the <node> element with the node's name and UUID, in case they
+ * weren't known before
+ */
+ crm_trace("Updating node name and UUID in CIB for %s", join_to);
+ tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE);
+ set_uuid(tmp1, XML_ATTR_ID, join_node);
+ crm_xml_add(tmp1, XML_ATTR_UNAME, join_to);
+ fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1);
+ free_xml(tmp1);
+
+ if (join_node->join == crm_join_nack_quiet) {
+ crm_trace("Not sending nack message to node %s with feature set older "
+ "than 3.17.0", join_to);
+ return;
+ }
+
+ join_node = crm_get_peer(0, join_to);
+ if (!crm_is_peer_active(join_node)) {
+ /*
+ * NACK'ing nodes that the membership layer doesn't know about yet
+ * simply creates more churn
+ *
+ * Better to leave them waiting and let the join restart when
+ * the new membership event comes in
+ *
+ * All other NACKs (due to versions etc) should still be processed
+ */
+ pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
+ return;
+ }
+
+ // Acknowledge or nack node's join request
+ crm_debug("%sing join-%d request from %s",
+ integrated? "Acknowledg" : "Nack", current_join_id, join_to);
+ acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
+ pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated);
+
+ if (integrated) {
+ // No change needed for a nacked node
+ crm_update_peer_join(__func__, join_node, crm_join_finalized);
+ pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
+
+ /* Iterate through the remote peer cache and add information on which
+ * node hosts each to the ACK message. This keeps new controllers in
+ * sync with what has already happened.
+ */
+ if (crm_remote_peer_cache_size() != 0) {
+ GHashTableIter iter;
+ crm_node_t *node = NULL;
+ xmlNode *remotes = create_xml_node(acknak, XML_CIB_TAG_NODES);
+
+ g_hash_table_iter_init(&iter, crm_remote_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
+ xmlNode *remote = NULL;
+
+ if (!node->conn_host) {
+ continue;
+ }
+
+ remote = create_xml_node(remotes, XML_CIB_TAG_NODE);
+ pcmk__xe_set_props(remote,
+ XML_ATTR_ID, node->uname,
+ XML_CIB_TAG_STATE, node->state,
+ PCMK__XA_CONN_HOST, node->conn_host,
+ NULL);
+ }
+ }
+ }
+ send_cluster_message(join_node, crm_msg_crmd, acknak, TRUE);
+ free_xml(acknak);
+ return;
+}
+
+gboolean
+check_join_state(enum crmd_fsa_state cur_state, const char *source)
+{
+ static unsigned long long highest_seq = 0;
+
+ if (controld_globals.membership_id != crm_peer_seq) {
+ crm_debug("join-%d: Membership changed from %llu to %llu "
+ CRM_XS " highest=%llu state=%s for=%s",
+ current_join_id, controld_globals.membership_id, crm_peer_seq,
+ highest_seq, fsa_state2string(cur_state), source);
+ if(highest_seq < crm_peer_seq) {
+ /* Don't spam the FSA with duplicates */
+ highest_seq = crm_peer_seq;
+ register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
+ }
+
+ } else if (cur_state == S_INTEGRATION) {
+ if (crmd_join_phase_count(crm_join_welcomed) == 0) {
+ int count = crmd_join_phase_count(crm_join_integrated);
+
+ crm_debug("join-%d: Integration of %d peer%s complete "
+ CRM_XS " state=%s for=%s",
+ current_join_id, count, pcmk__plural_s(count),
+ fsa_state2string(cur_state), source);
+ register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
+ return TRUE;
+ }
+
+ } else if (cur_state == S_FINALIZE_JOIN) {
+ if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
+ crm_debug("join-%d: Delaying finalization until we have CIB "
+ CRM_XS " state=%s for=%s",
+ current_join_id, fsa_state2string(cur_state), source);
+ return TRUE;
+
+ } else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
+ int count = crmd_join_phase_count(crm_join_welcomed);
+
+ crm_debug("join-%d: Still waiting on %d welcomed node%s "
+ CRM_XS " state=%s for=%s",
+ current_join_id, count, pcmk__plural_s(count),
+ fsa_state2string(cur_state), source);
+ crmd_join_phase_log(LOG_DEBUG);
+
+ } else if (crmd_join_phase_count(crm_join_integrated) != 0) {
+ int count = crmd_join_phase_count(crm_join_integrated);
+
+ crm_debug("join-%d: Still waiting on %d integrated node%s "
+ CRM_XS " state=%s for=%s",
+ current_join_id, count, pcmk__plural_s(count),
+ fsa_state2string(cur_state), source);
+ crmd_join_phase_log(LOG_DEBUG);
+
+ } else if (crmd_join_phase_count(crm_join_finalized) != 0) {
+ int count = crmd_join_phase_count(crm_join_finalized);
+
+ crm_debug("join-%d: Still waiting on %d finalized node%s "
+ CRM_XS " state=%s for=%s",
+ current_join_id, count, pcmk__plural_s(count),
+ fsa_state2string(cur_state), source);
+ crmd_join_phase_log(LOG_DEBUG);
+
+ } else {
+ crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s",
+ current_join_id, fsa_state2string(cur_state), source);
+ register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+void
+do_dc_join_final(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
+ crm_update_quorum(crm_have_quorum, TRUE);
+}
+
+int crmd_join_phase_count(enum crm_join_phase phase)
+{
+ int count = 0;
+ crm_node_t *peer;
+ GHashTableIter iter;
+
+ g_hash_table_iter_init(&iter, crm_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
+ if(peer->join == phase) {
+ count++;
+ }
+ }
+ return count;
+}
+
+void crmd_join_phase_log(int level)
+{
+ crm_node_t *peer;
+ GHashTableIter iter;
+
+ g_hash_table_iter_init(&iter, crm_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
+ do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname,
+ crm_join_phase_str(peer->join));
+ }
+}