diff options
Diffstat (limited to 'lib/cluster/membership.c')
-rw-r--r-- | lib/cluster/membership.c | 790 |
1 files changed, 497 insertions, 293 deletions
diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index f856cca..7eedc2e 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -1,5 +1,5 @@ /* - * Copyright 2004-2023 the Pacemaker project contributors + * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -13,6 +13,7 @@ # define _GNU_SOURCE #endif +#include <inttypes.h> // PRIu32 #include <sys/param.h> #include <sys/types.h> #include <stdio.h> @@ -22,7 +23,7 @@ #include <crm/common/ipc.h> #include <crm/common/xml_internal.h> #include <crm/cluster/internal.h> -#include <crm/msg_xml.h> +#include <crm/common/xml.h> #include <crm/stonith-ng.h> #include "crmcluster_private.h" @@ -32,6 +33,9 @@ * * Because cluster nodes can have conflicting names or UUIDs, * the hash table key is a uniquely generated ID. + * + * @COMPAT When this is internal, rename to cluster_node_member_cache and make + * static. */ GHashTable *crm_peer_cache = NULL; @@ -47,22 +51,22 @@ GHashTable *crm_peer_cache = NULL; * so it would be a good idea to merge them one day. * * libcluster provides two avenues for populating the cache: - * crm_remote_peer_get() and crm_remote_peer_cache_remove() directly manage it, - * while crm_remote_peer_cache_refresh() populates it via the CIB. + * pcmk__cluster_lookup_remote_node() and pcmk__cluster_forget_remote_node() + * directly manage it, while refresh_remote_nodes() populates it via the CIB. */ GHashTable *crm_remote_peer_cache = NULL; /* - * The known node cache tracks cluster and remote nodes that have been seen in + * The CIB cluster node cache tracks cluster nodes that have been seen in * the CIB. It is useful mainly when a caller needs to know about a node that * may no longer be in the membership, but doesn't want to add the node to the * main peer cache tables. */ -static GHashTable *known_node_cache = NULL; +static GHashTable *cluster_node_cib_cache = NULL; unsigned long long crm_peer_seq = 0; gboolean crm_have_quorum = FALSE; -static gboolean crm_autoreap = TRUE; +static bool autoreap = true; // Flag setting and clearing for crm_node_t:flags @@ -82,46 +86,80 @@ static gboolean crm_autoreap = TRUE; } while (0) static void update_peer_uname(crm_node_t *node, const char *uname); +static crm_node_t *find_cib_cluster_node(const char *id, const char *uname); -int -crm_remote_peer_cache_size(void) +/*! + * \internal + * \brief Get the number of Pacemaker Remote nodes that have been seen + * + * \return Number of cached Pacemaker Remote nodes + */ +unsigned int +pcmk__cluster_num_remote_nodes(void) { if (crm_remote_peer_cache == NULL) { - return 0; + return 0U; } return g_hash_table_size(crm_remote_peer_cache); } /*! - * \brief Get a remote node peer cache entry, creating it if necessary + * \internal + * \brief Get a remote node cache entry, creating it if necessary * * \param[in] node_name Name of remote node * - * \return Cache entry for node on success, NULL (and set errno) otherwise + * \return Cache entry for node on success, or \c NULL (and set \c errno) + * otherwise * - * \note When creating a new entry, this will leave the node state undetermined, - * so the caller should also call pcmk__update_peer_state() if the state + * \note When creating a new entry, this will leave the node state undetermined. + * The caller should also call \c pcmk__update_peer_state() if the state * is known. + * \note Because this can add and remove cache entries, callers should not + * assume any previously obtained cache entry pointers remain valid. */ crm_node_t * -crm_remote_peer_get(const char *node_name) +pcmk__cluster_lookup_remote_node(const char *node_name) { crm_node_t *node; + char *node_name_copy = NULL; if (node_name == NULL) { - errno = -EINVAL; + errno = EINVAL; return NULL; } + /* It's theoretically possible that the node was added to the cluster peer + * cache before it was known to be a Pacemaker Remote node. Remove that + * entry unless it has a node ID, which means the name actually is + * associated with a cluster node. (@TODO return an error in that case?) + */ + node = pcmk__search_node_caches(0, node_name, + pcmk__node_search_cluster_member); + if ((node != NULL) && (node->uuid == NULL)) { + /* node_name could be a pointer into the cache entry being removed, so + * reassign it to a copy before the original gets freed + */ + node_name_copy = strdup(node_name); + if (node_name_copy == NULL) { + errno = ENOMEM; + return NULL; + } + node_name = node_name_copy; + pcmk__cluster_forget_cluster_node(0, node_name); + } + /* Return existing cache entry if one exists */ node = g_hash_table_lookup(crm_remote_peer_cache, node_name); if (node) { + free(node_name_copy); return node; } /* Allocate a new entry */ node = calloc(1, sizeof(crm_node_t)); if (node == NULL) { + free(node_name_copy); return NULL; } @@ -130,7 +168,8 @@ crm_remote_peer_get(const char *node_name) node->uuid = strdup(node_name); if (node->uuid == NULL) { free(node); - errno = -ENOMEM; + errno = ENOMEM; + free(node_name_copy); return NULL; } @@ -140,14 +179,28 @@ crm_remote_peer_get(const char *node_name) /* Update the entry's uname, ensuring peer status callbacks are called */ update_peer_uname(node, node_name); + free(node_name_copy); return node; } +/*! + * \internal + * \brief Remove a node from the Pacemaker Remote node cache + * + * \param[in] node_name Name of node to remove from cache + * + * \note The caller must be careful not to use \p node_name after calling this + * function if it might be a pointer into the cache entry being removed. + */ void -crm_remote_peer_cache_remove(const char *node_name) +pcmk__cluster_forget_remote_node(const char *node_name) { - if (g_hash_table_remove(crm_remote_peer_cache, node_name)) { - crm_trace("removed %s from remote peer cache", node_name); + /* Do a lookup first, because node_name could be a pointer within the entry + * being removed -- we can't log it *after* removing it. + */ + if (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL) { + crm_trace("Removing %s from Pacemaker Remote node cache", node_name); + g_hash_table_remove(crm_remote_peer_cache, node_name); } } @@ -157,8 +210,8 @@ crm_remote_peer_cache_remove(const char *node_name) * * \param[in] node_state XML of node state * - * \return CRM_NODE_LOST if PCMK__XA_IN_CCM is false in node_state, - * CRM_NODE_MEMBER otherwise + * \return \c CRM_NODE_LOST if \c PCMK__XA_IN_CCM is false in + * \c PCMK__XE_NODE_STATE, \c CRM_NODE_MEMBER otherwise * \note Unlike most boolean XML attributes, this one defaults to true, for * backward compatibility with older controllers that don't set it. */ @@ -208,7 +261,7 @@ remote_cache_refresh_helper(xmlNode *result, void *user_data) if (node == NULL) { /* Node is not in cache, so add a new entry for it */ - node = crm_remote_peer_get(remote); + node = pcmk__cluster_lookup_remote_node(remote); CRM_ASSERT(node); if (state) { pcmk__update_peer_state(__func__, node, state, 0); @@ -236,16 +289,17 @@ is_dirty(gpointer key, gpointer value, gpointer user_data) } /*! - * \brief Repopulate the remote peer cache based on CIB XML + * \internal + * \brief Repopulate the remote node cache based on CIB XML * - * \param[in] xmlNode CIB XML to parse + * \param[in] cib CIB XML to parse */ -void -crm_remote_peer_cache_refresh(xmlNode *cib) +static void +refresh_remote_nodes(xmlNode *cib) { struct refresh_data data; - crm_peer_init(); + pcmk__cluster_init_node_caches(); /* First, we mark all existing cache entries as dirty, * so that later we can remove any that weren't in the CIB. @@ -254,7 +308,7 @@ crm_remote_peer_cache_refresh(xmlNode *cib) g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL); /* Look for guest nodes and remote nodes in the status section */ - data.field = "id"; + data.field = PCMK_XA_ID; data.has_state = TRUE; crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_STATUS, remote_cache_refresh_helper, &data); @@ -265,11 +319,11 @@ crm_remote_peer_cache_refresh(xmlNode *cib) * peer status callback isn't called until we're sure the node started * successfully. */ - data.field = "value"; + data.field = PCMK_XA_VALUE; data.has_state = FALSE; crm_foreach_xpath_result(cib, PCMK__XP_GUEST_NODE_CONFIG, remote_cache_refresh_helper, &data); - data.field = "id"; + data.field = PCMK_XA_ID; data.has_state = FALSE; crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_CONFIG, remote_cache_refresh_helper, &data); @@ -278,105 +332,183 @@ crm_remote_peer_cache_refresh(xmlNode *cib) g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL); } -gboolean -crm_is_peer_active(const crm_node_t * node) +/*! + * \internal + * \brief Check whether a node is an active cluster node + * + * Remote nodes are never considered active. This guarantees that they can never + * become DC. + * + * \param[in] node Node to check + * + * \return \c true if the node is an active cluster node, or \c false otherwise + */ +bool +pcmk__cluster_is_node_active(const crm_node_t *node) { - if(node == NULL) { - return FALSE; - } + const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer(); - if (pcmk_is_set(node->flags, crm_remote_node)) { - /* remote nodes are never considered active members. This - * guarantees they will never be considered for DC membership.*/ - return FALSE; + if ((node == NULL) || pcmk_is_set(node->flags, crm_remote_node)) { + return false; } + + switch (cluster_layer) { + case pcmk_cluster_layer_corosync: #if SUPPORT_COROSYNC - if (is_corosync_cluster()) { - return crm_is_corosync_peer_active(node); + return pcmk__corosync_is_peer_active(node); +#else + break; +#endif // SUPPORT_COROSYNC + default: + break; } -#endif - crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type())); - return FALSE; + + crm_err("Unhandled cluster layer: %s", + pcmk_cluster_layer_text(cluster_layer)); + return false; } +/*! + * \internal + * \brief Check if a node's entry should be removed from the cluster node cache + * + * A node should be removed from the cache if it's inactive and matches another + * \c crm_node_t (the search object). The node is considered a mismatch if any + * of the following are true: + * * The search object is \c NULL. + * * The search object has an ID set and the cached node's ID does not match it. + * * The search object does not have an ID set, and the cached node's name does + * not match the search node's name. (If both names are \c NULL, it's a + * match.) + * + * Otherwise, the node is considered a match. + * + * Note that if the search object has both an ID and a name set, the name is + * ignored for matching purposes. + * + * \param[in] key Ignored + * \param[in] value \c crm_node_t object from cluster node cache + * \param[in] user_data \c crm_node_t object to match against (search object) + * + * \return \c TRUE if the node entry should be removed from \c crm_peer_cache, + * or \c FALSE otherwise + */ static gboolean -crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data) +should_forget_cluster_node(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; crm_node_t *search = user_data; if (search == NULL) { return FALSE; - - } else if (search->id && node->id != search->id) { + } + if ((search->id != 0) && (node->id != search->id)) { return FALSE; - - } else if (search->id == 0 && !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) { + } + if ((search->id == 0) + && !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) { + // @TODO Consider name even if ID is set? + return FALSE; + } + if (pcmk__cluster_is_node_active(value)) { return FALSE; - - } else if (crm_is_peer_active(value) == FALSE) { - crm_info("Removing node with name %s and id %u from membership cache", - (node->uname? node->uname : "unknown"), node->id); - return TRUE; } - return FALSE; + + crm_info("Removing node with name %s and " PCMK_XA_ID " %u from membership " + "cache", + pcmk__s(node->uname, "(unknown)"), node->id); + return TRUE; } /*! - * \brief Remove all peer cache entries matching a node ID and/or uname + * \internal + * \brief Remove one or more inactive nodes from the cluster node cache * - * \param[in] id ID of node to remove (or 0 to ignore) - * \param[in] name Uname of node to remove (or NULL to ignore) + * All inactive nodes matching \p id and \p node_name as described in + * \c should_forget_cluster_node documentation are removed from the cache. * - * \return Number of cache entries removed + * If \p id is 0 and \p node_name is \c NULL, all inactive nodes are removed + * from the cache regardless of ID and name. This differs from clearing the + * cache, in that entries for active nodes are preserved. + * + * \param[in] id ID of node to remove from cache (0 to ignore) + * \param[in] node_name Name of node to remove from cache (ignored if \p id is + * nonzero) + * + * \note \p node_name is not modified directly, but it will be freed if it's a + * pointer into a cache entry that is removed. */ -guint -reap_crm_member(uint32_t id, const char *name) +void +pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name) { - int matches = 0; crm_node_t search = { 0, }; + char *criterion = NULL; // For logging + guint matches = 0; if (crm_peer_cache == NULL) { - crm_trace("Membership cache not initialized, ignoring purge request"); - return 0; + crm_trace("Membership cache not initialized, ignoring removal request"); + return; } search.id = id; - pcmk__str_update(&search.uname, name); - matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search); - if(matches) { - crm_notice("Purged %d peer%s with id=%u%s%s from the membership cache", - matches, pcmk__plural_s(matches), search.id, - (search.uname? " and/or uname=" : ""), - (search.uname? search.uname : "")); + search.uname = pcmk__str_copy(node_name); // May log after original freed + + if (id > 0) { + criterion = crm_strdup_printf(PCMK_XA_ID "=%" PRIu32, id); + + } else if (node_name != NULL) { + criterion = crm_strdup_printf(PCMK_XA_UNAME "=%s", node_name); + } + + matches = g_hash_table_foreach_remove(crm_peer_cache, + should_forget_cluster_node, &search); + if (matches > 0) { + if (criterion != NULL) { + crm_notice("Removed %u inactive node%s with %s from the membership " + "cache", + matches, pcmk__plural_s(matches), criterion); + } else { + crm_notice("Removed all (%u) inactive cluster nodes from the " + "membership cache", + matches); + } } else { - crm_info("No peers with id=%u%s%s to purge from the membership cache", - search.id, (search.uname? " and/or uname=" : ""), - (search.uname? search.uname : "")); + crm_info("No inactive cluster nodes%s%s to remove from the membership " + "cache", + ((criterion != NULL)? " with " : ""), pcmk__s(criterion, "")); } free(search.uname); - return matches; + free(criterion); } static void count_peer(gpointer key, gpointer value, gpointer user_data) { - guint *count = user_data; + unsigned int *count = user_data; crm_node_t *node = value; - if (crm_is_peer_active(node)) { + if (pcmk__cluster_is_node_active(node)) { *count = *count + 1; } } -guint -crm_active_peers(void) +/*! + * \internal + * \brief Get the number of active cluster nodes that have been seen + * + * Remote nodes are never considered active. This guarantees that they can never + * become DC. + * + * \return Number of active nodes in the cluster node cache + */ +unsigned int +pcmk__cluster_num_active_nodes(void) { - guint count = 0; + unsigned int count = 0; - if (crm_peer_cache) { + if (crm_peer_cache != NULL) { g_hash_table_foreach(crm_peer_cache, count_peer, &count); } return count; @@ -397,8 +529,12 @@ destroy_crm_node(gpointer data) free(node); } +/*! + * \internal + * \brief Initialize node caches + */ void -crm_peer_init(void) +pcmk__cluster_init_node_caches(void) { if (crm_peer_cache == NULL) { crm_peer_cache = pcmk__strikey_table(free, destroy_crm_node); @@ -408,69 +544,78 @@ crm_peer_init(void) crm_remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node); } - if (known_node_cache == NULL) { - known_node_cache = pcmk__strikey_table(free, destroy_crm_node); + if (cluster_node_cib_cache == NULL) { + cluster_node_cib_cache = pcmk__strikey_table(free, destroy_crm_node); } } +/*! + * \internal + * \brief Initialize node caches + */ void -crm_peer_destroy(void) +pcmk__cluster_destroy_node_caches(void) { if (crm_peer_cache != NULL) { - crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache)); + crm_trace("Destroying peer cache with %d members", + g_hash_table_size(crm_peer_cache)); g_hash_table_destroy(crm_peer_cache); crm_peer_cache = NULL; } if (crm_remote_peer_cache != NULL) { - crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache)); + crm_trace("Destroying remote peer cache with %d members", + pcmk__cluster_num_remote_nodes()); g_hash_table_destroy(crm_remote_peer_cache); crm_remote_peer_cache = NULL; } - if (known_node_cache != NULL) { - crm_trace("Destroying known node cache with %d members", - g_hash_table_size(known_node_cache)); - g_hash_table_destroy(known_node_cache); - known_node_cache = NULL; + if (cluster_node_cib_cache != NULL) { + crm_trace("Destroying configured cluster node cache with %d members", + g_hash_table_size(cluster_node_cib_cache)); + g_hash_table_destroy(cluster_node_cib_cache); + cluster_node_cib_cache = NULL; } - } static void (*peer_status_callback)(enum crm_status_type, crm_node_t *, const void *) = NULL; /*! + * \internal * \brief Set a client function that will be called after peer status changes * * \param[in] dispatch Pointer to function to use as callback * - * \note Previously, client callbacks were responsible for peer cache - * management. This is no longer the case, and client callbacks should do - * only client-specific handling. Callbacks MUST NOT add or remove entries - * in the peer caches. + * \note Client callbacks should do only client-specific handling. Callbacks + * must not add or remove entries in the peer caches. */ void -crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)) +pcmk__cluster_set_status_callback(void (*dispatch)(enum crm_status_type, + crm_node_t *, const void *)) { + // @TODO Improve documentation of peer_status_callback peer_status_callback = dispatch; } /*! + * \internal * \brief Tell the library whether to automatically reap lost nodes * - * If TRUE (the default), calling crm_update_peer_proc() will also update the - * peer state to CRM_NODE_MEMBER or CRM_NODE_LOST, and pcmk__update_peer_state() - * will reap peers whose state changes to anything other than CRM_NODE_MEMBER. + * If \c true (the default), calling \c crm_update_peer_proc() will also update + * the peer state to \c CRM_NODE_MEMBER or \c CRM_NODE_LOST, and updating the + * peer state will reap peers whose state changes to anything other than + * \c CRM_NODE_MEMBER. + * * Callers should leave this enabled unless they plan to manage the cache * separately on their own. * - * \param[in] autoreap TRUE to enable automatic reaping, FALSE to disable + * \param[in] enable \c true to enable automatic reaping, \c false to disable */ void -crm_set_autoreap(gboolean autoreap) +pcmk__cluster_set_autoreap(bool enable) { - crm_autoreap = autoreap; + autoreap = enable; } static void @@ -494,82 +639,7 @@ hash_find_by_data(gpointer key, gpointer value, gpointer user_data) /*! * \internal - * \brief Search caches for a node (cluster or Pacemaker Remote) - * - * \param[in] id If not 0, cluster node ID to search for - * \param[in] uname If not NULL, node name to search for - * \param[in] flags Bitmask of enum crm_get_peer_flags - * - * \return Node cache entry if found, otherwise NULL - */ -crm_node_t * -pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags) -{ - crm_node_t *node = NULL; - - CRM_ASSERT(id > 0 || uname != NULL); - - crm_peer_init(); - - if ((uname != NULL) && pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) { - node = g_hash_table_lookup(crm_remote_peer_cache, uname); - } - - if ((node == NULL) && pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) { - node = pcmk__search_cluster_node_cache(id, uname, NULL); - } - return node; -} - -/*! - * \brief Get a node cache entry (cluster or Pacemaker Remote) - * - * \param[in] id If not 0, cluster node ID to search for - * \param[in] uname If not NULL, node name to search for - * \param[in] uuid If not NULL while id is 0, node UUID instead of cluster - * node ID to search for - * \param[in] flags Bitmask of enum crm_get_peer_flags - * - * \return (Possibly newly created) node cache entry - */ -crm_node_t * -pcmk__get_peer_full(unsigned int id, const char *uname, const char *uuid, - int flags) -{ - crm_node_t *node = NULL; - - CRM_ASSERT(id > 0 || uname != NULL); - - crm_peer_init(); - - if (pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) { - node = g_hash_table_lookup(crm_remote_peer_cache, uname); - } - - if ((node == NULL) && pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) { - node = pcmk__get_peer(id, uname, uuid); - } - return node; -} - -/*! - * \brief Get a node cache entry (cluster or Pacemaker Remote) - * - * \param[in] id If not 0, cluster node ID to search for - * \param[in] uname If not NULL, node name to search for - * \param[in] flags Bitmask of enum crm_get_peer_flags - * - * \return (Possibly newly created) node cache entry - */ -crm_node_t * -crm_get_peer_full(unsigned int id, const char *uname, int flags) -{ - return pcmk__get_peer_full(id, uname, NULL, flags); -} - -/*! - * \internal - * \brief Search cluster node cache + * \brief Search cluster member node cache * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for @@ -578,9 +648,9 @@ crm_get_peer_full(unsigned int id, const char *uname, int flags) * * \return Cluster node cache entry if found, otherwise NULL */ -crm_node_t * -pcmk__search_cluster_node_cache(unsigned int id, const char *uname, - const char *uuid) +static crm_node_t * +search_cluster_member_cache(unsigned int id, const char *uname, + const char *uuid) { GHashTableIter iter; crm_node_t *node = NULL; @@ -589,7 +659,7 @@ pcmk__search_cluster_node_cache(unsigned int id, const char *uname, CRM_ASSERT(id > 0 || uname != NULL); - crm_peer_init(); + pcmk__cluster_init_node_caches(); if (uname != NULL) { g_hash_table_iter_init(&iter, crm_peer_cache); @@ -681,6 +751,85 @@ pcmk__search_cluster_node_cache(unsigned int id, const char *uname, return node; } +/*! + * \internal + * \brief Search caches for a node (cluster or Pacemaker Remote) + * + * \param[in] id If not 0, cluster node ID to search for + * \param[in] uname If not NULL, node name to search for + * \param[in] flags Group of enum pcmk__node_search_flags + * + * \return Node cache entry if found, otherwise NULL + */ +crm_node_t * +pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags) +{ + crm_node_t *node = NULL; + + CRM_ASSERT(id > 0 || uname != NULL); + + pcmk__cluster_init_node_caches(); + + if ((uname != NULL) && pcmk_is_set(flags, pcmk__node_search_remote)) { + node = g_hash_table_lookup(crm_remote_peer_cache, uname); + } + + if ((node == NULL) + && pcmk_is_set(flags, pcmk__node_search_cluster_member)) { + + node = search_cluster_member_cache(id, uname, NULL); + } + + if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster_cib)) { + char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id); + + node = find_cib_cluster_node(id_str, uname); + free(id_str); + } + + return node; +} + +/*! + * \internal + * \brief Purge a node from cache (both cluster and Pacemaker Remote) + * + * \param[in] node_name If not NULL, purge only nodes with this name + * \param[in] node_id If not 0, purge cluster nodes only if they have this ID + * + * \note If \p node_name is NULL and \p node_id is 0, no nodes will be purged. + * If \p node_name is not NULL and \p node_id is not 0, Pacemaker Remote + * nodes that match \p node_name will be purged, and cluster nodes that + * match both \p node_name and \p node_id will be purged. + * \note The caller must be careful not to use \p node_name after calling this + * function if it might be a pointer into a cache entry being removed. + */ +void +pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id) +{ + char *node_name_copy = NULL; + + if ((node_name == NULL) && (node_id == 0U)) { + return; + } + + // Purge from Pacemaker Remote node cache + if ((node_name != NULL) + && (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL)) { + /* node_name could be a pointer into the cache entry being purged, + * so reassign it to a copy before the original gets freed + */ + node_name_copy = pcmk__str_copy(node_name); + node_name = node_name_copy; + + crm_trace("Purging %s from Pacemaker Remote node cache", node_name); + g_hash_table_remove(crm_remote_peer_cache, node_name); + } + + pcmk__cluster_forget_cluster_node(node_id, node_name); + free(node_name_copy); +} + #if SUPPORT_COROSYNC static guint remove_conflicting_peer(crm_node_t *node) @@ -704,7 +853,7 @@ remove_conflicting_peer(crm_node_t *node) && existing_node->uname != NULL && strcasecmp(existing_node->uname, node->uname) == 0) { - if (crm_is_peer_active(existing_node)) { + if (pcmk__cluster_is_node_active(existing_node)) { continue; } @@ -721,32 +870,51 @@ remove_conflicting_peer(crm_node_t *node) #endif /*! - * \brief Get a cluster node cache entry + * \internal + * \brief Get a cluster node cache entry, possibly creating one if not found + * + * If \c pcmk__node_search_cluster_member is set in \p flags, the return value + * is guaranteed not to be \c NULL. A new cache entry is created if one does not + * already exist. * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for * \param[in] uuid If not NULL while id is 0, node UUID instead of cluster * node ID to search for + * \param[in] flags Group of enum pcmk__node_search_flags * * \return (Possibly newly created) cluster node cache entry */ /* coverity[-alloc] Memory is referenced in one or both hashtables */ crm_node_t * -pcmk__get_peer(unsigned int id, const char *uname, const char *uuid) +pcmk__get_node(unsigned int id, const char *uname, const char *uuid, + uint32_t flags) { crm_node_t *node = NULL; char *uname_lookup = NULL; CRM_ASSERT(id > 0 || uname != NULL); - crm_peer_init(); + pcmk__cluster_init_node_caches(); + + // Check the Pacemaker Remote node cache first + if (pcmk_is_set(flags, pcmk__node_search_remote)) { + node = g_hash_table_lookup(crm_remote_peer_cache, uname); + if (node != NULL) { + return node; + } + } + + if (!pcmk_is_set(flags, pcmk__node_search_cluster_member)) { + return NULL; + } - node = pcmk__search_cluster_node_cache(id, uname, uuid); + node = search_cluster_member_cache(id, uname, uuid); /* if uname wasn't provided, and find_peer did not turn up a uname based on id. * we need to do a lookup of the node name using the id in the cluster membership. */ if ((node == NULL || node->uname == NULL) && (uname == NULL)) { - uname_lookup = get_node_name(id); + uname_lookup = pcmk__cluster_node_name(id); } if (uname_lookup) { @@ -755,16 +923,14 @@ pcmk__get_peer(unsigned int id, const char *uname, const char *uuid) /* try to turn up the node one more time now that we know the uname. */ if (node == NULL) { - node = pcmk__search_cluster_node_cache(id, uname, uuid); + node = search_cluster_member_cache(id, uname, uuid); } } - if (node == NULL) { char *uniqueid = crm_generate_uuid(); - node = calloc(1, sizeof(crm_node_t)); - CRM_ASSERT(node); + node = pcmk__assert_alloc(1, sizeof(crm_node_t)); crm_info("Created entry %s/%p for node %s/%u (%d total)", uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache)); @@ -785,7 +951,7 @@ pcmk__get_peer(unsigned int id, const char *uname, const char *uuid) if(node->uuid == NULL) { if (uuid == NULL) { - uuid = crm_peer_uuid(node); + uuid = pcmk__cluster_node_uuid(node); } if (uuid) { @@ -802,21 +968,6 @@ pcmk__get_peer(unsigned int id, const char *uname, const char *uuid) } /*! - * \brief Get a cluster node cache entry - * - * \param[in] id If not 0, cluster node ID to search for - * \param[in] uname If not NULL, node name to search for - * - * \return (Possibly newly created) cluster node cache entry - */ -/* coverity[-alloc] Memory is referenced in one or both hashtables */ -crm_node_t * -crm_get_peer(unsigned int id, const char *uname) -{ - return pcmk__get_peer(id, uname, NULL); -} - -/*! * \internal * \brief Update a node's uname * @@ -856,7 +1007,9 @@ update_peer_uname(crm_node_t *node, const char *uname) } #if SUPPORT_COROSYNC - if (is_corosync_cluster() && !pcmk_is_set(node->flags, crm_remote_node)) { + if ((pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) + && !pcmk_is_set(node->flags, crm_remote_node)) { + remove_conflicting_peer(node); } #endif @@ -879,24 +1032,6 @@ proc2text(enum crm_proc_flag proc) case crm_proc_none: text = "none"; break; - case crm_proc_based: - text = "pacemaker-based"; - break; - case crm_proc_controld: - text = "pacemaker-controld"; - break; - case crm_proc_schedulerd: - text = "pacemaker-schedulerd"; - break; - case crm_proc_execd: - text = "pacemaker-execd"; - break; - case crm_proc_attrd: - text = "pacemaker-attrd"; - break; - case crm_proc_fenced: - text = "pacemaker-fenced"; - break; case crm_proc_cpg: text = "corosync-cpg"; break; @@ -942,7 +1077,7 @@ crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const changed = TRUE; } - } else if (pcmk__str_eq(status, ONLINESTATUS, pcmk__str_casei)) { + } else if (pcmk__str_eq(status, PCMK_VALUE_ONLINE, pcmk__str_casei)) { if ((node->processes & flag) != flag) { node->processes = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Peer process", @@ -989,7 +1124,7 @@ crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const return NULL; } - if (crm_autoreap) { + if (autoreap) { const char *peer_state = NULL; if (pcmk_is_set(node->processes, crm_get_cluster_proc())) { @@ -1099,18 +1234,20 @@ update_peer_state_iter(const char *source, crm_node_t *node, const char *state, } free(last); - if (crm_autoreap && !is_member + if (autoreap && !is_member && !pcmk_is_set(node->flags, crm_remote_node)) { /* We only autoreap from the peer cache, not the remote peer cache, * because the latter should be managed only by - * crm_remote_peer_cache_refresh(). + * refresh_remote_nodes(). */ if(iter) { - crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname); + crm_notice("Purged 1 peer with " PCMK_XA_ID + "=%u and/or uname=%s from the membership cache", + node->id, node->uname); g_hash_table_iter_remove(iter); } else { - reap_crm_member(node->id, node->uname); + pcmk__cluster_forget_cluster_node(node->id, node->uname); } node = NULL; } @@ -1178,7 +1315,7 @@ pcmk__reap_unseen_nodes(uint64_t membership) } static crm_node_t * -find_known_node(const char *id, const char *uname) +find_cib_cluster_node(const char *id, const char *uname) { GHashTableIter iter; crm_node_t *node = NULL; @@ -1186,7 +1323,7 @@ find_known_node(const char *id, const char *uname) crm_node_t *by_name = NULL; if (uname) { - g_hash_table_iter_init(&iter, known_node_cache); + g_hash_table_iter_init(&iter, cluster_node_cib_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (node->uname && strcasecmp(node->uname, uname) == 0) { crm_trace("Name match: %s = %p", node->uname, node); @@ -1197,7 +1334,7 @@ find_known_node(const char *id, const char *uname) } if (id) { - g_hash_table_iter_init(&iter, known_node_cache); + g_hash_table_iter_init(&iter, cluster_node_cib_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(strcasecmp(node->uuid, id) == 0) { crm_trace("ID match: %s= %p", id, node); @@ -1256,28 +1393,24 @@ find_known_node(const char *id, const char *uname) } static void -known_node_cache_refresh_helper(xmlNode *xml_node, void *user_data) +cluster_node_cib_cache_refresh_helper(xmlNode *xml_node, void *user_data) { - const char *id = crm_element_value(xml_node, XML_ATTR_ID); - const char *uname = crm_element_value(xml_node, XML_ATTR_UNAME); + const char *id = crm_element_value(xml_node, PCMK_XA_ID); + const char *uname = crm_element_value(xml_node, PCMK_XA_UNAME); crm_node_t * node = NULL; CRM_CHECK(id != NULL && uname !=NULL, return); - node = find_known_node(id, uname); + node = find_cib_cluster_node(id, uname); if (node == NULL) { char *uniqueid = crm_generate_uuid(); - node = calloc(1, sizeof(crm_node_t)); - CRM_ASSERT(node != NULL); + node = pcmk__assert_alloc(1, sizeof(crm_node_t)); - node->uname = strdup(uname); - CRM_ASSERT(node->uname != NULL); + node->uname = pcmk__str_copy(uname); + node->uuid = pcmk__str_copy(id); - node->uuid = strdup(id); - CRM_ASSERT(node->uuid != NULL); - - g_hash_table_replace(known_node_cache, uniqueid, node); + g_hash_table_replace(cluster_node_cib_cache, uniqueid, node); } else if (pcmk_is_set(node->flags, crm_node_dirty)) { pcmk__str_update(&node->uname, uname); @@ -1289,77 +1422,148 @@ known_node_cache_refresh_helper(xmlNode *xml_node, void *user_data) } static void -refresh_known_node_cache(xmlNode *cib) +refresh_cluster_node_cib_cache(xmlNode *cib) { - crm_peer_init(); + pcmk__cluster_init_node_caches(); - g_hash_table_foreach(known_node_cache, mark_dirty, NULL); + g_hash_table_foreach(cluster_node_cib_cache, mark_dirty, NULL); crm_foreach_xpath_result(cib, PCMK__XP_MEMBER_NODE_CONFIG, - known_node_cache_refresh_helper, NULL); + cluster_node_cib_cache_refresh_helper, NULL); - /* Remove all old cache entries that weren't seen in the CIB */ - g_hash_table_foreach_remove(known_node_cache, is_dirty, NULL); + // Remove all old cache entries that weren't seen in the CIB + g_hash_table_foreach_remove(cluster_node_cib_cache, is_dirty, NULL); } void pcmk__refresh_node_caches_from_cib(xmlNode *cib) { - crm_remote_peer_cache_refresh(cib); - refresh_known_node_cache(cib); + refresh_remote_nodes(cib); + refresh_cluster_node_cib_cache(cib); +} + +// Deprecated functions kept only for backward API compatibility +// LCOV_EXCL_START + +#include <crm/cluster/compat.h> + +int +crm_terminate_member(int nodeid, const char *uname, void *unused) +{ + return stonith_api_kick(nodeid, uname, 120, TRUE); +} + +int +crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) +{ + return stonith_api_kick(nodeid, uname, 120, TRUE); } -/*! - * \internal - * \brief Search known node cache - * - * \param[in] id If not 0, cluster node ID to search for - * \param[in] uname If not NULL, node name to search for - * \param[in] flags Bitmask of enum crm_get_peer_flags - * - * \return Known node cache entry if found, otherwise NULL - */ crm_node_t * -pcmk__search_known_node_cache(unsigned int id, const char *uname, - uint32_t flags) +crm_get_peer(unsigned int id, const char *uname) { - crm_node_t *node = NULL; - char *id_str = NULL; + return pcmk__get_node(id, uname, NULL, pcmk__node_search_cluster_member); +} - CRM_ASSERT(id > 0 || uname != NULL); +crm_node_t * +crm_get_peer_full(unsigned int id, const char *uname, int flags) +{ + return pcmk__get_node(id, uname, NULL, flags); +} - node = pcmk__search_node_caches(id, uname, flags); +int +crm_remote_peer_cache_size(void) +{ + unsigned int count = pcmk__cluster_num_remote_nodes(); - if (node || !(flags & CRM_GET_PEER_CLUSTER)) { - return node; - } + return QB_MIN(count, INT_MAX); +} - if (id > 0) { - id_str = crm_strdup_printf("%u", id); - } +void +crm_remote_peer_cache_refresh(xmlNode *cib) +{ + refresh_remote_nodes(cib); +} - node = find_known_node(id_str, uname); +crm_node_t * +crm_remote_peer_get(const char *node_name) +{ + return pcmk__cluster_lookup_remote_node(node_name); +} - free(id_str); - return node; +void +crm_remote_peer_cache_remove(const char *node_name) +{ + pcmk__cluster_forget_remote_node(node_name); } +gboolean +crm_is_peer_active(const crm_node_t * node) +{ + return pcmk__cluster_is_node_active(node); +} -// Deprecated functions kept only for backward API compatibility -// LCOV_EXCL_START +guint +crm_active_peers(void) +{ + return pcmk__cluster_num_active_nodes(); +} -#include <crm/cluster/compat.h> +guint +reap_crm_member(uint32_t id, const char *name) +{ + int matches = 0; + crm_node_t search = { 0, }; -int -crm_terminate_member(int nodeid, const char *uname, void *unused) + if (crm_peer_cache == NULL) { + crm_trace("Membership cache not initialized, ignoring purge request"); + return 0; + } + + search.id = id; + search.uname = pcmk__str_copy(name); + matches = g_hash_table_foreach_remove(crm_peer_cache, + should_forget_cluster_node, &search); + if(matches) { + crm_notice("Purged %d peer%s with " PCMK_XA_ID + "=%u%s%s from the membership cache", + matches, pcmk__plural_s(matches), search.id, + (search.uname? " and/or uname=" : ""), + (search.uname? search.uname : "")); + + } else { + crm_info("No peers with " PCMK_XA_ID + "=%u%s%s to purge from the membership cache", + search.id, (search.uname? " and/or uname=" : ""), + (search.uname? search.uname : "")); + } + + free(search.uname); + return matches; +} + +void +crm_peer_init(void) { - return stonith_api_kick(nodeid, uname, 120, TRUE); + pcmk__cluster_init_node_caches(); } -int -crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) +void +crm_peer_destroy(void) { - return stonith_api_kick(nodeid, uname, 120, TRUE); + pcmk__cluster_destroy_node_caches(); +} + +void +crm_set_autoreap(gboolean enable) +{ + pcmk__cluster_set_autoreap(enable); +} + +void +crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)) +{ + pcmk__cluster_set_status_callback(dispatch); } // LCOV_EXCL_STOP |