diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 06:53:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 06:53:20 +0000 |
commit | e5a812082ae033afb1eed82c0f2df3d0f6bdc93f (patch) | |
tree | a6716c9275b4b413f6c9194798b34b91affb3cc7 /lib/cluster/membership.c | |
parent | Initial commit. (diff) | |
download | pacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.tar.xz pacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.zip |
Adding upstream version 2.1.6.upstream/2.1.6
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lib/cluster/membership.c')
-rw-r--r-- | lib/cluster/membership.c | 1301 |
1 files changed, 1301 insertions, 0 deletions
diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c new file mode 100644 index 0000000..0c54f19 --- /dev/null +++ b/lib/cluster/membership.c @@ -0,0 +1,1301 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif + +#include <sys/param.h> +#include <sys/types.h> +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <glib.h> +#include <crm/common/ipc.h> +#include <crm/common/xml_internal.h> +#include <crm/cluster/internal.h> +#include <crm/msg_xml.h> +#include <crm/stonith-ng.h> +#include "crmcluster_private.h" + +/* The peer cache remembers cluster nodes that have been seen. + * This is managed mostly automatically by libcluster, based on + * cluster membership events. + * + * Because cluster nodes can have conflicting names or UUIDs, + * the hash table key is a uniquely generated ID. + */ +GHashTable *crm_peer_cache = NULL; + +/* + * The remote peer cache tracks pacemaker_remote nodes. While the + * value has the same type as the peer cache's, it is tracked separately for + * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs, + * so the name (which is also the UUID) is used as the hash table key; there + * is no equivalent of membership events, so management is not automatic; and + * most users of the peer cache need to exclude pacemaker_remote nodes. + * + * That said, using a single cache would be more logical and less error-prone, + * so it would be a good idea to merge them one day. + * + * libcluster provides two avenues for populating the cache: + * crm_remote_peer_get() and crm_remote_peer_cache_remove() directly manage it, + * while crm_remote_peer_cache_refresh() populates it via the CIB. + */ +GHashTable *crm_remote_peer_cache = NULL; + +/* + * The known node cache tracks cluster and remote nodes that have been seen in + * the CIB. It is useful mainly when a caller needs to know about a node that + * may no longer be in the membership, but doesn't want to add the node to the + * main peer cache tables. + */ +static GHashTable *known_node_cache = NULL; + +unsigned long long crm_peer_seq = 0; +gboolean crm_have_quorum = FALSE; +static gboolean crm_autoreap = TRUE; + +// Flag setting and clearing for crm_node_t:flags + +#define set_peer_flags(peer, flags_to_set) do { \ + (peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ + "Peer", (peer)->uname, \ + (peer)->flags, (flags_to_set), \ + #flags_to_set); \ + } while (0) + +#define clear_peer_flags(peer, flags_to_clear) do { \ + (peer)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ + LOG_TRACE, \ + "Peer", (peer)->uname, \ + (peer)->flags, (flags_to_clear), \ + #flags_to_clear); \ + } while (0) + +static void update_peer_uname(crm_node_t *node, const char *uname); + +int +crm_remote_peer_cache_size(void) +{ + if (crm_remote_peer_cache == NULL) { + return 0; + } + return g_hash_table_size(crm_remote_peer_cache); +} + +/*! + * \brief Get a remote node peer cache entry, creating it if necessary + * + * \param[in] node_name Name of remote node + * + * \return Cache entry for node on success, NULL (and set errno) otherwise + * + * \note When creating a new entry, this will leave the node state undetermined, + * so the caller should also call pcmk__update_peer_state() if the state + * is known. + */ +crm_node_t * +crm_remote_peer_get(const char *node_name) +{ + crm_node_t *node; + + if (node_name == NULL) { + errno = -EINVAL; + return NULL; + } + + /* Return existing cache entry if one exists */ + node = g_hash_table_lookup(crm_remote_peer_cache, node_name); + if (node) { + return node; + } + + /* Allocate a new entry */ + node = calloc(1, sizeof(crm_node_t)); + if (node == NULL) { + return NULL; + } + + /* Populate the essential information */ + set_peer_flags(node, crm_remote_node); + node->uuid = strdup(node_name); + if (node->uuid == NULL) { + free(node); + errno = -ENOMEM; + return NULL; + } + + /* Add the new entry to the cache */ + g_hash_table_replace(crm_remote_peer_cache, node->uuid, node); + crm_trace("added %s to remote cache", node_name); + + /* Update the entry's uname, ensuring peer status callbacks are called */ + update_peer_uname(node, node_name); + return node; +} + +void +crm_remote_peer_cache_remove(const char *node_name) +{ + if (g_hash_table_remove(crm_remote_peer_cache, node_name)) { + crm_trace("removed %s from remote peer cache", node_name); + } +} + +/*! + * \internal + * \brief Return node status based on a CIB status entry + * + * \param[in] node_state XML of node state + * + * \return CRM_NODE_LOST if XML_NODE_IN_CLUSTER is false in node_state, + * CRM_NODE_MEMBER otherwise + * \note Unlike most boolean XML attributes, this one defaults to true, for + * backward compatibility with older controllers that don't set it. + */ +static const char * +remote_state_from_cib(const xmlNode *node_state) +{ + bool status = false; + + if (pcmk__xe_get_bool_attr(node_state, XML_NODE_IN_CLUSTER, &status) == pcmk_rc_ok && !status) { + return CRM_NODE_LOST; + } else { + return CRM_NODE_MEMBER; + } +} + +/* user data for looping through remote node xpath searches */ +struct refresh_data { + const char *field; /* XML attribute to check for node name */ + gboolean has_state; /* whether to update node state based on XML */ +}; + +/*! + * \internal + * \brief Process one pacemaker_remote node xpath search result + * + * \param[in] result XML search result + * \param[in] user_data what to look for in the XML + */ +static void +remote_cache_refresh_helper(xmlNode *result, void *user_data) +{ + const struct refresh_data *data = user_data; + const char *remote = crm_element_value(result, data->field); + const char *state = NULL; + crm_node_t *node; + + CRM_CHECK(remote != NULL, return); + + /* Determine node's state, if the result has it */ + if (data->has_state) { + state = remote_state_from_cib(result); + } + + /* Check whether cache already has entry for node */ + node = g_hash_table_lookup(crm_remote_peer_cache, remote); + + if (node == NULL) { + /* Node is not in cache, so add a new entry for it */ + node = crm_remote_peer_get(remote); + CRM_ASSERT(node); + if (state) { + pcmk__update_peer_state(__func__, node, state, 0); + } + + } else if (pcmk_is_set(node->flags, crm_node_dirty)) { + /* Node is in cache and hasn't been updated already, so mark it clean */ + clear_peer_flags(node, crm_node_dirty); + if (state) { + pcmk__update_peer_state(__func__, node, state, 0); + } + } +} + +static void +mark_dirty(gpointer key, gpointer value, gpointer user_data) +{ + set_peer_flags((crm_node_t *) value, crm_node_dirty); +} + +static gboolean +is_dirty(gpointer key, gpointer value, gpointer user_data) +{ + return pcmk_is_set(((crm_node_t*)value)->flags, crm_node_dirty); +} + +/*! + * \brief Repopulate the remote peer cache based on CIB XML + * + * \param[in] xmlNode CIB XML to parse + */ +void +crm_remote_peer_cache_refresh(xmlNode *cib) +{ + struct refresh_data data; + + crm_peer_init(); + + /* First, we mark all existing cache entries as dirty, + * so that later we can remove any that weren't in the CIB. + * We don't empty the cache, because we need to detect changes in state. + */ + g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL); + + /* Look for guest nodes and remote nodes in the status section */ + data.field = "id"; + data.has_state = TRUE; + crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_STATUS, + remote_cache_refresh_helper, &data); + + /* Look for guest nodes and remote nodes in the configuration section, + * because they may have just been added and not have a status entry yet. + * In that case, the cached node state will be left NULL, so that the + * peer status callback isn't called until we're sure the node started + * successfully. + */ + data.field = "value"; + data.has_state = FALSE; + crm_foreach_xpath_result(cib, PCMK__XP_GUEST_NODE_CONFIG, + remote_cache_refresh_helper, &data); + data.field = "id"; + data.has_state = FALSE; + crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_CONFIG, + remote_cache_refresh_helper, &data); + + /* Remove all old cache entries that weren't seen in the CIB */ + g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL); +} + +gboolean +crm_is_peer_active(const crm_node_t * node) +{ + if(node == NULL) { + return FALSE; + } + + if (pcmk_is_set(node->flags, crm_remote_node)) { + /* remote nodes are never considered active members. This + * guarantees they will never be considered for DC membership.*/ + return FALSE; + } +#if SUPPORT_COROSYNC + if (is_corosync_cluster()) { + return crm_is_corosync_peer_active(node); + } +#endif + crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type())); + return FALSE; +} + +static gboolean +crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data) +{ + crm_node_t *node = value; + crm_node_t *search = user_data; + + if (search == NULL) { + return FALSE; + + } else if (search->id && node->id != search->id) { + return FALSE; + + } else if (search->id == 0 && !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) { + return FALSE; + + } else if (crm_is_peer_active(value) == FALSE) { + crm_info("Removing node with name %s and id %u from membership cache", + (node->uname? node->uname : "unknown"), node->id); + return TRUE; + } + return FALSE; +} + +/*! + * \brief Remove all peer cache entries matching a node ID and/or uname + * + * \param[in] id ID of node to remove (or 0 to ignore) + * \param[in] name Uname of node to remove (or NULL to ignore) + * + * \return Number of cache entries removed + */ +guint +reap_crm_member(uint32_t id, const char *name) +{ + int matches = 0; + crm_node_t search = { 0, }; + + if (crm_peer_cache == NULL) { + crm_trace("Membership cache not initialized, ignoring purge request"); + return 0; + } + + search.id = id; + pcmk__str_update(&search.uname, name); + matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search); + if(matches) { + crm_notice("Purged %d peer%s with id=%u%s%s from the membership cache", + matches, pcmk__plural_s(matches), search.id, + (search.uname? " and/or uname=" : ""), + (search.uname? search.uname : "")); + + } else { + crm_info("No peers with id=%u%s%s to purge from the membership cache", + search.id, (search.uname? " and/or uname=" : ""), + (search.uname? search.uname : "")); + } + + free(search.uname); + return matches; +} + +static void +count_peer(gpointer key, gpointer value, gpointer user_data) +{ + guint *count = user_data; + crm_node_t *node = value; + + if (crm_is_peer_active(node)) { + *count = *count + 1; + } +} + +guint +crm_active_peers(void) +{ + guint count = 0; + + if (crm_peer_cache) { + g_hash_table_foreach(crm_peer_cache, count_peer, &count); + } + return count; +} + +static void +destroy_crm_node(gpointer data) +{ + crm_node_t *node = data; + + crm_trace("Destroying entry for node %u: %s", node->id, node->uname); + + free(node->uname); + free(node->state); + free(node->uuid); + free(node->expected); + free(node->conn_host); + free(node); +} + +void +crm_peer_init(void) +{ + if (crm_peer_cache == NULL) { + crm_peer_cache = pcmk__strikey_table(free, destroy_crm_node); + } + + if (crm_remote_peer_cache == NULL) { + crm_remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node); + } + + if (known_node_cache == NULL) { + known_node_cache = pcmk__strikey_table(free, destroy_crm_node); + } +} + +void +crm_peer_destroy(void) +{ + if (crm_peer_cache != NULL) { + crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache)); + g_hash_table_destroy(crm_peer_cache); + crm_peer_cache = NULL; + } + + if (crm_remote_peer_cache != NULL) { + crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache)); + g_hash_table_destroy(crm_remote_peer_cache); + crm_remote_peer_cache = NULL; + } + + if (known_node_cache != NULL) { + crm_trace("Destroying known node cache with %d members", + g_hash_table_size(known_node_cache)); + g_hash_table_destroy(known_node_cache); + known_node_cache = NULL; + } + +} + +static void (*peer_status_callback)(enum crm_status_type, crm_node_t *, + const void *) = NULL; + +/*! + * \brief Set a client function that will be called after peer status changes + * + * \param[in] dispatch Pointer to function to use as callback + * + * \note Previously, client callbacks were responsible for peer cache + * management. This is no longer the case, and client callbacks should do + * only client-specific handling. Callbacks MUST NOT add or remove entries + * in the peer caches. + */ +void +crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)) +{ + peer_status_callback = dispatch; +} + +/*! + * \brief Tell the library whether to automatically reap lost nodes + * + * If TRUE (the default), calling crm_update_peer_proc() will also update the + * peer state to CRM_NODE_MEMBER or CRM_NODE_LOST, and pcmk__update_peer_state() + * will reap peers whose state changes to anything other than CRM_NODE_MEMBER. + * Callers should leave this enabled unless they plan to manage the cache + * separately on their own. + * + * \param[in] autoreap TRUE to enable automatic reaping, FALSE to disable + */ +void +crm_set_autoreap(gboolean autoreap) +{ + crm_autoreap = autoreap; +} + +static void +dump_peer_hash(int level, const char *caller) +{ + GHashTableIter iter; + const char *id = NULL; + crm_node_t *node = NULL; + + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) { + do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id); + } +} + +static gboolean +hash_find_by_data(gpointer key, gpointer value, gpointer user_data) +{ + return value == user_data; +} + +/*! + * \internal + * \brief Search caches for a node (cluster or Pacemaker Remote) + * + * \param[in] id If not 0, cluster node ID to search for + * \param[in] uname If not NULL, node name to search for + * \param[in] flags Bitmask of enum crm_get_peer_flags + * + * \return Node cache entry if found, otherwise NULL + */ +crm_node_t * +pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags) +{ + crm_node_t *node = NULL; + + CRM_ASSERT(id > 0 || uname != NULL); + + crm_peer_init(); + + if ((uname != NULL) && pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) { + node = g_hash_table_lookup(crm_remote_peer_cache, uname); + } + + if ((node == NULL) && pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) { + node = pcmk__search_cluster_node_cache(id, uname); + } + return node; +} + +/*! + * \brief Get a node cache entry (cluster or Pacemaker Remote) + * + * \param[in] id If not 0, cluster node ID to search for + * \param[in] uname If not NULL, node name to search for + * \param[in] flags Bitmask of enum crm_get_peer_flags + * + * \return (Possibly newly created) node cache entry + */ +crm_node_t * +crm_get_peer_full(unsigned int id, const char *uname, int flags) +{ + crm_node_t *node = NULL; + + CRM_ASSERT(id > 0 || uname != NULL); + + crm_peer_init(); + + if (pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) { + node = g_hash_table_lookup(crm_remote_peer_cache, uname); + } + + if ((node == NULL) && pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) { + node = crm_get_peer(id, uname); + } + return node; +} + +/*! + * \internal + * \brief Search cluster node cache + * + * \param[in] id If not 0, cluster node ID to search for + * \param[in] uname If not NULL, node name to search for + * + * \return Cluster node cache entry if found, otherwise NULL + */ +crm_node_t * +pcmk__search_cluster_node_cache(unsigned int id, const char *uname) +{ + GHashTableIter iter; + crm_node_t *node = NULL; + crm_node_t *by_id = NULL; + crm_node_t *by_name = NULL; + + CRM_ASSERT(id > 0 || uname != NULL); + + crm_peer_init(); + + if (uname != NULL) { + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + if(node->uname && strcasecmp(node->uname, uname) == 0) { + crm_trace("Name match: %s = %p", node->uname, node); + by_name = node; + break; + } + } + } + + if (id > 0) { + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + if(node->id == id) { + crm_trace("ID match: %u = %p", node->id, node); + by_id = node; + break; + } + } + } + + node = by_id; /* Good default */ + if(by_id == by_name) { + /* Nothing to do if they match (both NULL counts) */ + crm_trace("Consistent: %p for %u/%s", by_id, id, uname); + + } else if(by_id == NULL && by_name) { + crm_trace("Only one: %p for %u/%s", by_name, id, uname); + + if(id && by_name->id) { + dump_peer_hash(LOG_WARNING, __func__); + crm_crit("Node %u and %u share the same name '%s'", + id, by_name->id, uname); + node = NULL; /* Create a new one */ + + } else { + node = by_name; + } + + } else if(by_name == NULL && by_id) { + crm_trace("Only one: %p for %u/%s", by_id, id, uname); + + if(uname && by_id->uname) { + dump_peer_hash(LOG_WARNING, __func__); + crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct", + uname, by_id->uname, id, uname); + } + + } else if(uname && by_id->uname) { + if(pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) { + crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id); + g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name); + + } else { + crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname); + dump_peer_hash(LOG_INFO, __func__); + crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE, + TRUE); + } + + } else if(id && by_name->id) { + crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname); + + } else { + /* Simple merge */ + + /* Only corosync-based clusters use node IDs. The functions that call + * pcmk__update_peer_state() and crm_update_peer_proc() only know + * nodeid, so 'by_id' is authoritative when merging. + */ + dump_peer_hash(LOG_DEBUG, __func__); + + crm_info("Merging %p into %p", by_name, by_id); + g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name); + } + + return node; +} + +#if SUPPORT_COROSYNC +static guint +remove_conflicting_peer(crm_node_t *node) +{ + int matches = 0; + GHashTableIter iter; + crm_node_t *existing_node = NULL; + + if (node->id == 0 || node->uname == NULL) { + return 0; + } + + if (!pcmk__corosync_has_nodelist()) { + return 0; + } + + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) { + if (existing_node->id > 0 + && existing_node->id != node->id + && existing_node->uname != NULL + && strcasecmp(existing_node->uname, node->uname) == 0) { + + if (crm_is_peer_active(existing_node)) { + continue; + } + + crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u", + existing_node->id, existing_node->uname, node->id); + + g_hash_table_iter_remove(&iter); + matches++; + } + } + + return matches; +} +#endif + +/*! + * \brief Get a cluster node cache entry + * + * \param[in] id If not 0, cluster node ID to search for + * \param[in] uname If not NULL, node name to search for + * + * \return (Possibly newly created) cluster node cache entry + */ +/* coverity[-alloc] Memory is referenced in one or both hashtables */ +crm_node_t * +crm_get_peer(unsigned int id, const char *uname) +{ + crm_node_t *node = NULL; + char *uname_lookup = NULL; + + CRM_ASSERT(id > 0 || uname != NULL); + + crm_peer_init(); + + node = pcmk__search_cluster_node_cache(id, uname); + + /* if uname wasn't provided, and find_peer did not turn up a uname based on id. + * we need to do a lookup of the node name using the id in the cluster membership. */ + if ((node == NULL || node->uname == NULL) && (uname == NULL)) { + uname_lookup = get_node_name(id); + } + + if (uname_lookup) { + uname = uname_lookup; + crm_trace("Inferred a name of '%s' for node %u", uname, id); + + /* try to turn up the node one more time now that we know the uname. */ + if (node == NULL) { + node = pcmk__search_cluster_node_cache(id, uname); + } + } + + + if (node == NULL) { + char *uniqueid = crm_generate_uuid(); + + node = calloc(1, sizeof(crm_node_t)); + CRM_ASSERT(node); + + crm_info("Created entry %s/%p for node %s/%u (%d total)", + uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache)); + g_hash_table_replace(crm_peer_cache, uniqueid, node); + } + + if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) { + crm_info("Node %u is now known as %s", id, uname); + } + + if(id > 0 && node->id == 0) { + node->id = id; + } + + if (uname && (node->uname == NULL)) { + update_peer_uname(node, uname); + } + + if(node->uuid == NULL) { + const char *uuid = crm_peer_uuid(node); + + if (uuid) { + crm_info("Node %u has uuid %s", id, uuid); + + } else { + crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname); + } + } + + free(uname_lookup); + + return node; +} + +/*! + * \internal + * \brief Update a node's uname + * + * \param[in,out] node Node object to update + * \param[in] uname New name to set + * + * \note This function should not be called within a peer cache iteration, + * because in some cases it can remove conflicting cache entries, + * which would invalidate the iterator. + */ +static void +update_peer_uname(crm_node_t *node, const char *uname) +{ + CRM_CHECK(uname != NULL, + crm_err("Bug: can't update node name without name"); return); + CRM_CHECK(node != NULL, + crm_err("Bug: can't update node name to %s without node", uname); + return); + + if (pcmk__str_eq(uname, node->uname, pcmk__str_casei)) { + crm_debug("Node uname '%s' did not change", uname); + return; + } + + for (const char *c = uname; *c; ++c) { + if ((*c >= 'A') && (*c <= 'Z')) { + crm_warn("Node names with capitals are discouraged, consider changing '%s'", + uname); + break; + } + } + + pcmk__str_update(&node->uname, uname); + + if (peer_status_callback != NULL) { + peer_status_callback(crm_status_uname, node, NULL); + } + +#if SUPPORT_COROSYNC + if (is_corosync_cluster() && !pcmk_is_set(node->flags, crm_remote_node)) { + remove_conflicting_peer(node); + } +#endif +} + +/*! + * \internal + * \brief Get log-friendly string equivalent of a process flag + * + * \param[in] proc Process flag + * + * \return Log-friendly string equivalent of \p proc + */ +static inline const char * +proc2text(enum crm_proc_flag proc) +{ + const char *text = "unknown"; + + switch (proc) { + case crm_proc_none: + text = "none"; + break; + case crm_proc_based: + text = "pacemaker-based"; + break; + case crm_proc_controld: + text = "pacemaker-controld"; + break; + case crm_proc_schedulerd: + text = "pacemaker-schedulerd"; + break; + case crm_proc_execd: + text = "pacemaker-execd"; + break; + case crm_proc_attrd: + text = "pacemaker-attrd"; + break; + case crm_proc_fenced: + text = "pacemaker-fenced"; + break; + case crm_proc_cpg: + text = "corosync-cpg"; + break; + } + return text; +} + +/*! + * \internal + * \brief Update a node's process information (and potentially state) + * + * \param[in] source Caller's function name (for log messages) + * \param[in,out] node Node object to update + * \param[in] flag Bitmask of new process information + * \param[in] status node status (online, offline, etc.) + * + * \return NULL if any node was reaped from peer caches, value of node otherwise + * + * \note If this function returns NULL, the supplied node object was likely + * freed and should not be used again. This function should not be + * called within a cache iteration if reaping is possible, otherwise + * reaping could invalidate the iterator. + */ +crm_node_t * +crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status) +{ + uint32_t last = 0; + gboolean changed = FALSE; + + CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL", + source, proc2text(flag), status); + return NULL); + + /* Pacemaker doesn't spawn processes on remote nodes */ + if (pcmk_is_set(node->flags, crm_remote_node)) { + return node; + } + + last = node->processes; + if (status == NULL) { + node->processes = flag; + if (node->processes != last) { + changed = TRUE; + } + + } else if (pcmk__str_eq(status, ONLINESTATUS, pcmk__str_casei)) { + if ((node->processes & flag) != flag) { + node->processes = pcmk__set_flags_as(__func__, __LINE__, + LOG_TRACE, "Peer process", + node->uname, node->processes, + flag, "processes"); + changed = TRUE; + } + + } else if (node->processes & flag) { + node->processes = pcmk__clear_flags_as(__func__, __LINE__, + LOG_TRACE, "Peer process", + node->uname, node->processes, + flag, "processes"); + changed = TRUE; + } + + if (changed) { + if (status == NULL && flag <= crm_proc_none) { + crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname, + node->id); + } else { + crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id, + proc2text(flag), status); + } + + /* Call the client callback first, then update the peer state, + * in case the node will be reaped + */ + if (peer_status_callback != NULL) { + peer_status_callback(crm_status_processes, node, &last); + } + + /* The client callback shouldn't touch the peer caches, + * but as a safety net, bail if the peer cache was destroyed. + */ + if (crm_peer_cache == NULL) { + return NULL; + } + + if (crm_autoreap) { + const char *peer_state = NULL; + + if (pcmk_is_set(node->processes, crm_get_cluster_proc())) { + peer_state = CRM_NODE_MEMBER; + } else { + peer_state = CRM_NODE_LOST; + } + node = pcmk__update_peer_state(__func__, node, peer_state, 0); + } + } else { + crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id, + proc2text(flag), status); + } + return node; +} + +/*! + * \internal + * \brief Update a cluster node cache entry's expected join state + * + * \param[in] source Caller's function name (for logging) + * \param[in,out] node Node to update + * \param[in] expected Node's new join state + */ +void +pcmk__update_peer_expected(const char *source, crm_node_t *node, + const char *expected) +{ + char *last = NULL; + gboolean changed = FALSE; + + CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected); + return); + + /* Remote nodes don't participate in joins */ + if (pcmk_is_set(node->flags, crm_remote_node)) { + return; + } + + last = node->expected; + if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) { + node->expected = strdup(expected); + changed = TRUE; + } + + if (changed) { + crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id, + expected, last); + free(last); + } else { + crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname, + node->id, expected); + } +} + +/*! + * \internal + * \brief Update a node's state and membership information + * + * \param[in] source Caller's function name (for log messages) + * \param[in,out] node Node object to update + * \param[in] state Node's new state + * \param[in] membership Node's new membership ID + * \param[in,out] iter If not NULL, pointer to node's peer cache iterator + * + * \return NULL if any node was reaped, value of node otherwise + * + * \note If this function returns NULL, the supplied node object was likely + * freed and should not be used again. This function may be called from + * within a peer cache iteration if the iterator is supplied. + */ +static crm_node_t * +update_peer_state_iter(const char *source, crm_node_t *node, const char *state, + uint64_t membership, GHashTableIter *iter) +{ + gboolean is_member; + + CRM_CHECK(node != NULL, + crm_err("Could not set state for unknown host to %s" + CRM_XS " source=%s", state, source); + return NULL); + + is_member = pcmk__str_eq(state, CRM_NODE_MEMBER, pcmk__str_casei); + if (is_member) { + node->when_lost = 0; + if (membership) { + node->last_seen = membership; + } + } + + if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) { + char *last = node->state; + + node->state = strdup(state); + crm_notice("Node %s state is now %s " CRM_XS + " nodeid=%u previous=%s source=%s", node->uname, state, + node->id, (last? last : "unknown"), source); + if (peer_status_callback != NULL) { + peer_status_callback(crm_status_nstate, node, last); + } + free(last); + + if (crm_autoreap && !is_member + && !pcmk_is_set(node->flags, crm_remote_node)) { + /* We only autoreap from the peer cache, not the remote peer cache, + * because the latter should be managed only by + * crm_remote_peer_cache_refresh(). + */ + if(iter) { + crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname); + g_hash_table_iter_remove(iter); + + } else { + reap_crm_member(node->id, node->uname); + } + node = NULL; + } + + } else { + crm_trace("Node %s state is unchanged (%s) " CRM_XS + " nodeid=%u source=%s", node->uname, state, node->id, source); + } + return node; +} + +/*! + * \brief Update a node's state and membership information + * + * \param[in] source Caller's function name (for log messages) + * \param[in,out] node Node object to update + * \param[in] state Node's new state + * \param[in] membership Node's new membership ID + * + * \return NULL if any node was reaped, value of node otherwise + * + * \note If this function returns NULL, the supplied node object was likely + * freed and should not be used again. This function should not be + * called within a cache iteration if reaping is possible, + * otherwise reaping could invalidate the iterator. + */ +crm_node_t * +pcmk__update_peer_state(const char *source, crm_node_t *node, + const char *state, uint64_t membership) +{ + return update_peer_state_iter(source, node, state, membership, NULL); +} + +/*! + * \internal + * \brief Reap all nodes from cache whose membership information does not match + * + * \param[in] membership Membership ID of nodes to keep + */ +void +pcmk__reap_unseen_nodes(uint64_t membership) +{ + GHashTableIter iter; + crm_node_t *node = NULL; + + crm_trace("Reaping unseen nodes..."); + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) { + if (node->last_seen != membership) { + if (node->state) { + /* + * Calling update_peer_state_iter() allows us to + * remove the node from crm_peer_cache without + * invalidating our iterator + */ + update_peer_state_iter(__func__, node, CRM_NODE_LOST, + membership, &iter); + + } else { + crm_info("State of node %s[%u] is still unknown", + node->uname, node->id); + } + } + } +} + +static crm_node_t * +find_known_node(const char *id, const char *uname) +{ + GHashTableIter iter; + crm_node_t *node = NULL; + crm_node_t *by_id = NULL; + crm_node_t *by_name = NULL; + + if (uname) { + g_hash_table_iter_init(&iter, known_node_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + if (node->uname && strcasecmp(node->uname, uname) == 0) { + crm_trace("Name match: %s = %p", node->uname, node); + by_name = node; + break; + } + } + } + + if (id) { + g_hash_table_iter_init(&iter, known_node_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + if(strcasecmp(node->uuid, id) == 0) { + crm_trace("ID match: %s= %p", id, node); + by_id = node; + break; + } + } + } + + node = by_id; /* Good default */ + if (by_id == by_name) { + /* Nothing to do if they match (both NULL counts) */ + crm_trace("Consistent: %p for %s/%s", by_id, id, uname); + + } else if (by_id == NULL && by_name) { + crm_trace("Only one: %p for %s/%s", by_name, id, uname); + + if (id) { + node = NULL; + + } else { + node = by_name; + } + + } else if (by_name == NULL && by_id) { + crm_trace("Only one: %p for %s/%s", by_id, id, uname); + + if (uname) { + node = NULL; + } + + } else if (uname && by_id->uname + && pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) { + /* Multiple nodes have the same uname in the CIB. + * Return by_id. */ + + } else if (id && by_name->uuid + && pcmk__str_eq(id, by_name->uuid, pcmk__str_casei)) { + /* Multiple nodes have the same id in the CIB. + * Return by_name. */ + node = by_name; + + } else { + node = NULL; + } + + if (node == NULL) { + crm_debug("Couldn't find node%s%s%s%s", + id? " " : "", + id? id : "", + uname? " with name " : "", + uname? uname : ""); + } + + return node; +} + +static void +known_node_cache_refresh_helper(xmlNode *xml_node, void *user_data) +{ + const char *id = crm_element_value(xml_node, XML_ATTR_ID); + const char *uname = crm_element_value(xml_node, XML_ATTR_UNAME); + crm_node_t * node = NULL; + + CRM_CHECK(id != NULL && uname !=NULL, return); + node = find_known_node(id, uname); + + if (node == NULL) { + char *uniqueid = crm_generate_uuid(); + + node = calloc(1, sizeof(crm_node_t)); + CRM_ASSERT(node != NULL); + + node->uname = strdup(uname); + CRM_ASSERT(node->uname != NULL); + + node->uuid = strdup(id); + CRM_ASSERT(node->uuid != NULL); + + g_hash_table_replace(known_node_cache, uniqueid, node); + + } else if (pcmk_is_set(node->flags, crm_node_dirty)) { + pcmk__str_update(&node->uname, uname); + + /* Node is in cache and hasn't been updated already, so mark it clean */ + clear_peer_flags(node, crm_node_dirty); + } + +} + +static void +refresh_known_node_cache(xmlNode *cib) +{ + crm_peer_init(); + + g_hash_table_foreach(known_node_cache, mark_dirty, NULL); + + crm_foreach_xpath_result(cib, PCMK__XP_MEMBER_NODE_CONFIG, + known_node_cache_refresh_helper, NULL); + + /* Remove all old cache entries that weren't seen in the CIB */ + g_hash_table_foreach_remove(known_node_cache, is_dirty, NULL); +} + +void +pcmk__refresh_node_caches_from_cib(xmlNode *cib) +{ + crm_remote_peer_cache_refresh(cib); + refresh_known_node_cache(cib); +} + +/*! + * \internal + * \brief Search known node cache + * + * \param[in] id If not 0, cluster node ID to search for + * \param[in] uname If not NULL, node name to search for + * \param[in] flags Bitmask of enum crm_get_peer_flags + * + * \return Known node cache entry if found, otherwise NULL + */ +crm_node_t * +pcmk__search_known_node_cache(unsigned int id, const char *uname, + uint32_t flags) +{ + crm_node_t *node = NULL; + char *id_str = NULL; + + CRM_ASSERT(id > 0 || uname != NULL); + + node = pcmk__search_node_caches(id, uname, flags); + + if (node || !(flags & CRM_GET_PEER_CLUSTER)) { + return node; + } + + if (id > 0) { + id_str = crm_strdup_printf("%u", id); + } + + node = find_known_node(id_str, uname); + + free(id_str); + return node; +} + + +// Deprecated functions kept only for backward API compatibility +// LCOV_EXCL_START + +#include <crm/cluster/compat.h> + +int +crm_terminate_member(int nodeid, const char *uname, void *unused) +{ + return stonith_api_kick(nodeid, uname, 120, TRUE); +} + +int +crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) +{ + return stonith_api_kick(nodeid, uname, 120, TRUE); +} + +// LCOV_EXCL_STOP +// End deprecated API |