diff options
Diffstat (limited to 'lib/cluster')
-rw-r--r-- | lib/cluster/Makefile.am | 6 | ||||
-rw-r--r-- | lib/cluster/cluster.c | 528 | ||||
-rw-r--r-- | lib/cluster/corosync.c | 175 | ||||
-rw-r--r-- | lib/cluster/cpg.c | 472 | ||||
-rw-r--r-- | lib/cluster/crmcluster_private.h | 26 | ||||
-rw-r--r-- | lib/cluster/election.c | 67 | ||||
-rw-r--r-- | lib/cluster/membership.c | 790 | ||||
-rw-r--r-- | lib/cluster/tests/Makefile.am | 12 | ||||
-rw-r--r-- | lib/cluster/tests/cluster/Makefile.am | 18 | ||||
-rw-r--r-- | lib/cluster/tests/cluster/pcmk_cluster_set_destroy_fn_test.c | 79 | ||||
-rw-r--r-- | lib/cluster/tests/cpg/Makefile.am | 19 | ||||
-rw-r--r-- | lib/cluster/tests/cpg/pcmk_cpg_set_confchg_fn_test.c | 98 | ||||
-rw-r--r-- | lib/cluster/tests/cpg/pcmk_cpg_set_deliver_fn_test.c | 94 |
13 files changed, 1593 insertions, 791 deletions
diff --git a/lib/cluster/Makefile.am b/lib/cluster/Makefile.am index 2ddbffb..85ba22d 100644 --- a/lib/cluster/Makefile.am +++ b/lib/cluster/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright 2004-2023 the Pacemaker project contributors +# Copyright 2004-2024 the Pacemaker project contributors # # The version control history for this file may have further details. # @@ -8,12 +8,14 @@ # include $(top_srcdir)/mk/common.mk +SUBDIRS = tests + noinst_HEADERS = crmcluster_private.h ## libraries lib_LTLIBRARIES = libcrmcluster.la -libcrmcluster_la_LDFLAGS = -version-info 31:0:2 +libcrmcluster_la_LDFLAGS = -version-info 32:0:3 libcrmcluster_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libcrmcluster_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c index f2cd428..d650ca5 100644 --- a/lib/cluster/cluster.c +++ b/lib/cluster/cluster.c @@ -1,5 +1,5 @@ /* - * Copyright 2004-2023 the Pacemaker project contributors + * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -10,6 +10,8 @@ #include <crm_internal.h> #include <dlfcn.h> +#include <inttypes.h> // PRIu32 +#include <stdbool.h> #include <stdio.h> #include <unistd.h> #include <string.h> @@ -17,128 +19,182 @@ #include <time.h> #include <sys/param.h> #include <sys/types.h> +#include <sys/utsname.h> // uname() + +#include <glib.h> // gboolean #include <crm/crm.h> -#include <crm/msg_xml.h> #include <crm/common/ipc.h> +#include <crm/common/xml.h> #include <crm/cluster/internal.h> #include "crmcluster_private.h" CRM_TRACE_INIT_DATA(cluster); /*! - * \brief Get (and set if needed) a node's UUID + * \internal + * \brief Get the message type equivalent of a string * - * \param[in,out] peer Node to check + * \param[in] text String of message type * - * \return Node UUID of \p peer, or NULL if unknown + * \return Message type equivalent of \p text + */ +enum crm_ais_msg_types +pcmk__cluster_parse_msg_type(const char *text) +{ + CRM_CHECK(text != NULL, return crm_msg_none); + + text = pcmk__message_name(text); + + if (pcmk__str_eq(text, "ais", pcmk__str_none)) { + return crm_msg_ais; + } + if (pcmk__str_eq(text, CRM_SYSTEM_CIB, pcmk__str_none)) { + return crm_msg_cib; + } + if (pcmk__str_any_of(text, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL)) { + return crm_msg_crmd; + } + if (pcmk__str_eq(text, CRM_SYSTEM_TENGINE, pcmk__str_none)) { + return crm_msg_te; + } + if (pcmk__str_eq(text, CRM_SYSTEM_PENGINE, pcmk__str_none)) { + return crm_msg_pe; + } + if (pcmk__str_eq(text, CRM_SYSTEM_LRMD, pcmk__str_none)) { + return crm_msg_lrmd; + } + if (pcmk__str_eq(text, CRM_SYSTEM_STONITHD, pcmk__str_none)) { + return crm_msg_stonithd; + } + if (pcmk__str_eq(text, "stonith-ng", pcmk__str_none)) { + return crm_msg_stonith_ng; + } + if (pcmk__str_eq(text, "attrd", pcmk__str_none)) { + return crm_msg_attrd; + } + return crm_msg_none; +} + +/*! + * \internal + * \brief Get a node's cluster-layer UUID, setting it if not already set + * + * \param[in,out] node Node to check + * + * \return Cluster-layer node UUID of \p node, or \c NULL if unknown */ const char * -crm_peer_uuid(crm_node_t *peer) +pcmk__cluster_node_uuid(crm_node_t *node) { - char *uuid = NULL; + const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer(); - // Check simple cases first, to avoid any calls that might block - if (peer == NULL) { + if (node == NULL) { return NULL; } - if (peer->uuid != NULL) { - return peer->uuid; + if (node->uuid != NULL) { + return node->uuid; } - switch (get_cluster_type()) { - case pcmk_cluster_corosync: + switch (cluster_layer) { #if SUPPORT_COROSYNC - uuid = pcmk__corosync_uuid(peer); -#endif - break; + case pcmk_cluster_layer_corosync: + node->uuid = pcmk__corosync_uuid(node); + return node->uuid; +#endif // SUPPORT_COROSYNC - case pcmk_cluster_unknown: - case pcmk_cluster_invalid: - crm_err("Unsupported cluster type"); - break; + default: + crm_err("Unsupported cluster layer %s", + pcmk_cluster_layer_text(cluster_layer)); + return NULL; } - - peer->uuid = uuid; - return peer->uuid; } /*! + * \internal * \brief Connect to the cluster layer * - * \param[in,out] Initialized cluster object to connect + * \param[in,out] cluster Initialized cluster object to connect * - * \return TRUE on success, otherwise FALSE + * \return Standard Pacemaker return code */ -gboolean -crm_cluster_connect(crm_cluster_t *cluster) +int +pcmk_cluster_connect(pcmk_cluster_t *cluster) { - enum cluster_type_e type = get_cluster_type(); + const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer(); + const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer); - crm_notice("Connecting to %s cluster infrastructure", - name_for_cluster_type(type)); - switch (type) { - case pcmk_cluster_corosync: + // cts-lab looks for this message + crm_notice("Connecting to %s cluster layer", cluster_layer_s); + + switch (cluster_layer) { #if SUPPORT_COROSYNC - crm_peer_init(); + case pcmk_cluster_layer_corosync: return pcmk__corosync_connect(cluster); -#else - break; #endif // SUPPORT_COROSYNC + default: break; } - return FALSE; + + crm_err("Failed to connect to unsupported cluster layer %s", + cluster_layer_s); + return EPROTONOSUPPORT; } /*! * \brief Disconnect from the cluster layer * * \param[in,out] cluster Cluster object to disconnect + * + * \return Standard Pacemaker return code */ -void -crm_cluster_disconnect(crm_cluster_t *cluster) +int +pcmk_cluster_disconnect(pcmk_cluster_t *cluster) { - enum cluster_type_e type = get_cluster_type(); + const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer(); + const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer); - crm_info("Disconnecting from %s cluster infrastructure", - name_for_cluster_type(type)); - switch (type) { - case pcmk_cluster_corosync: + crm_info("Disconnecting from %s cluster layer", cluster_layer_s); + + switch (cluster_layer) { #if SUPPORT_COROSYNC - crm_peer_destroy(); + case pcmk_cluster_layer_corosync: pcmk__corosync_disconnect(cluster); + pcmk__cluster_destroy_node_caches(); + return pcmk_rc_ok; #endif // SUPPORT_COROSYNC - break; + default: break; } + + crm_err("Failed to disconnect from unsupported cluster layer %s", + cluster_layer_s); + return EPROTONOSUPPORT; } /*! - * \brief Allocate a new \p crm_cluster_t object + * \brief Allocate a new \p pcmk_cluster_t object * - * \return A newly allocated \p crm_cluster_t object (guaranteed not \p NULL) + * \return A newly allocated \p pcmk_cluster_t object (guaranteed not \c NULL) * \note The caller is responsible for freeing the return value using * \p pcmk_cluster_free(). */ -crm_cluster_t * +pcmk_cluster_t * pcmk_cluster_new(void) { - crm_cluster_t *cluster = calloc(1, sizeof(crm_cluster_t)); - - CRM_ASSERT(cluster != NULL); - return cluster; + return (pcmk_cluster_t *) pcmk__assert_alloc(1, sizeof(pcmk_cluster_t)); } /*! - * \brief Free a \p crm_cluster_t object and its dynamically allocated members + * \brief Free a \p pcmk_cluster_t object and its dynamically allocated members * * \param[in,out] cluster Cluster object to free */ void -pcmk_cluster_free(crm_cluster_t *cluster) +pcmk_cluster_free(pcmk_cluster_t *cluster) { if (cluster == NULL) { return; @@ -149,257 +205,339 @@ pcmk_cluster_free(crm_cluster_t *cluster) } /*! + * \brief Set the destroy function for a cluster object + * + * \param[in,out] cluster Cluster object + * \param[in] fn Destroy function to set + * + * \return Standard Pacemaker return code + */ +int +pcmk_cluster_set_destroy_fn(pcmk_cluster_t *cluster, void (*fn)(gpointer)) +{ + if (cluster == NULL) { + return EINVAL; + } + cluster->destroy = fn; + return pcmk_rc_ok; +} + +/*! + * \internal * \brief Send an XML message via the cluster messaging layer * * \param[in] node Cluster node to send message to * \param[in] service Message type to use in message host info * \param[in] data XML message to send - * \param[in] ordered Ignored for currently supported messaging layers * - * \return TRUE on success, otherwise FALSE + * \return \c true on success, or \c false otherwise */ -gboolean -send_cluster_message(const crm_node_t *node, enum crm_ais_msg_types service, - const xmlNode *data, gboolean ordered) +bool +pcmk__cluster_send_message(const crm_node_t *node, + enum crm_ais_msg_types service, const xmlNode *data) { - switch (get_cluster_type()) { - case pcmk_cluster_corosync: + // @TODO Return standard Pacemaker return code + switch (pcmk_get_cluster_layer()) { #if SUPPORT_COROSYNC + case pcmk_cluster_layer_corosync: return pcmk__cpg_send_xml(data, node, service); -#endif - break; +#endif // SUPPORT_COROSYNC + default: break; } - return FALSE; + return false; } /*! - * \brief Get the local node's name + * \internal + * \brief Get the node name corresponding to a cluster-layer node ID * - * \return Local node's name - * \note This will fatally exit if local node name cannot be known. - */ -const char * -get_local_node_name(void) -{ - static char *name = NULL; - - if (name == NULL) { - name = get_node_name(0); - } - return name; -} - -/*! - * \brief Get the node name corresponding to a cluster node ID + * Get the node name from the cluster layer if possible. Otherwise, if for the + * local node, call \c uname() and get the \c nodename member from the + * <tt>struct utsname</tt> object. * - * \param[in] nodeid Node ID to check (or 0 for local node) + * \param[in] nodeid Node ID to check (or 0 for the local node) * * \return Node name corresponding to \p nodeid - * \note This will fatally exit if \p nodeid is 0 and local node name cannot be - * known. + * + * \note This will fatally exit if \c uname() fails to get the local node name + * or we run out of memory. + * \note The caller is responsible for freeing the return value using \c free(). */ char * -get_node_name(uint32_t nodeid) +pcmk__cluster_node_name(uint32_t nodeid) { - char *name = NULL; - enum cluster_type_e stack = get_cluster_type(); + const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer(); + const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer); - switch (stack) { - case pcmk_cluster_corosync: + switch (cluster_layer) { #if SUPPORT_COROSYNC - name = pcmk__corosync_name(0, nodeid); + case pcmk_cluster_layer_corosync: + return pcmk__corosync_name(0, nodeid); +#else break; #endif // SUPPORT_COROSYNC default: - crm_err("Unknown cluster type: %s (%d)", name_for_cluster_type(stack), stack); + crm_err("Unsupported cluster layer: %s", cluster_layer_s); + break; } - if ((name == NULL) && (nodeid == 0)) { - name = pcmk_hostname(); - if (name == NULL) { + if (nodeid == 0) { + struct utsname hostinfo; + + crm_notice("Could not get local node name from %s cluster layer, " + "defaulting to local hostname", + cluster_layer_s); + + if (uname(&hostinfo) < 0) { // @TODO Maybe let the caller decide what to do - crm_err("Could not obtain the local %s node name", - name_for_cluster_type(stack)); + crm_err("Failed to get the local hostname"); crm_exit(CRM_EX_FATAL); } - crm_notice("Defaulting to uname -n for the local %s node name", - name_for_cluster_type(stack)); + return pcmk__str_copy(hostinfo.nodename); } + crm_notice("Could not obtain a node name for node with " + PCMK_XA_ID "=" PRIu32, + nodeid); + return NULL; +} + +/*! + * \internal + * \brief Get the local node's cluster-layer node name + * + * If getting the node name from the cluster layer is impossible, call + * \c uname() and get the \c nodename member from the <tt>struct utsname</tt> + * object. + * + * \return Local node's name + * + * \note This will fatally exit if \c uname() fails to get the local node name + * or we run out of memory. + */ +const char * +pcmk__cluster_local_node_name(void) +{ + // @TODO Refactor to avoid trivially leaking name at exit + static char *name = NULL; + if (name == NULL) { - crm_notice("Could not obtain a node name for %s node with id %u", - name_for_cluster_type(stack), nodeid); + name = pcmk__cluster_node_name(0); } return name; } /*! - * \brief Get the node name corresponding to a node UUID + * \internal + * \brief Get the node name corresonding to a node UUID * - * \param[in] uuid UUID of desired node + * Look for the UUID in both the remote node cache and the cluster member cache. * - * \return name of desired node + * \param[in] uuid UUID to search for * - * \note This relies on the remote peer cache being populated with all - * remote nodes in the cluster, so callers should maintain that cache. + * \return Node name corresponding to \p uuid if found, or \c NULL otherwise */ const char * -crm_peer_uname(const char *uuid) +pcmk__node_name_from_uuid(const char *uuid) { + /* @TODO There are too many functions in libcrmcluster that look up a node + * from the node caches (possibly creating a cache entry if none exists). + * There are at least the following: + * * pcmk__cluster_lookup_remote_node() + * * pcmk__get_node() + * * pcmk__node_name_from_uuid() + * * pcmk__search_node_caches() + * + * There's a lot of duplication among them, but they all do slightly + * different things. We should try to clean them up and consolidate them to + * the extent possible, likely with new helper functions. + */ GHashTableIter iter; crm_node_t *node = NULL; CRM_CHECK(uuid != NULL, return NULL); - /* remote nodes have the same uname and uuid */ + // Remote nodes have the same uname and uuid if (g_hash_table_lookup(crm_remote_peer_cache, uuid)) { return uuid; } - /* avoid blocking calls where possible */ g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (pcmk__str_eq(node->uuid, uuid, pcmk__str_casei)) { - if (node->uname != NULL) { - return node->uname; - } - break; - } - } - node = NULL; - - if (is_corosync_cluster()) { - long long id; - - if ((pcmk__scan_ll(uuid, &id, 0LL) != pcmk_rc_ok) - || (id < 1LL) || (id > UINT32_MAX)) { - crm_err("Invalid Corosync node ID '%s'", uuid); - return NULL; - } - - node = pcmk__search_cluster_node_cache((uint32_t) id, NULL, NULL); - if (node != NULL) { - crm_info("Setting uuid for node %s[%u] to %s", - node->uname, node->id, uuid); - node->uuid = strdup(uuid); return node->uname; } - return NULL; } - return NULL; } /*! - * \brief Get a log-friendly string equivalent of a cluster type + * \brief Get a log-friendly string equivalent of a cluster layer * - * \param[in] type Cluster type + * \param[in] layer Cluster layer * - * \return Log-friendly string corresponding to \p type + * \return Log-friendly string corresponding to \p layer */ const char * -name_for_cluster_type(enum cluster_type_e type) +pcmk_cluster_layer_text(enum pcmk_cluster_layer layer) { - switch (type) { - case pcmk_cluster_corosync: + switch (layer) { + case pcmk_cluster_layer_corosync: return "corosync"; - case pcmk_cluster_unknown: + case pcmk_cluster_layer_unknown: return "unknown"; - case pcmk_cluster_invalid: + case pcmk_cluster_layer_invalid: + return "invalid"; + default: + crm_err("Invalid cluster layer: %d", layer); return "invalid"; } - crm_err("Invalid cluster type: %d", type); - return "invalid"; } /*! - * \brief Get (and validate) the local cluster type + * \brief Get and validate the local cluster layer + * + * If a cluster layer is not configured via the \c PCMK__ENV_CLUSTER_TYPE local + * option, this will try to detect an active cluster from among the supported + * cluster layers. + * + * \return Local cluster layer * - * \return Local cluster type - * \note This will fatally exit if the local cluster type is invalid. + * \note This will fatally exit if the configured cluster layer is invalid. */ -enum cluster_type_e -get_cluster_type(void) +enum pcmk_cluster_layer +pcmk_get_cluster_layer(void) { - bool detected = false; + static enum pcmk_cluster_layer cluster_layer = pcmk_cluster_layer_unknown; const char *cluster = NULL; - static enum cluster_type_e cluster_type = pcmk_cluster_unknown; - /* Return the previous calculation, if any */ - if (cluster_type != pcmk_cluster_unknown) { - return cluster_type; + // Cluster layer is stable once set + if (cluster_layer != pcmk_cluster_layer_unknown) { + return cluster_layer; } cluster = pcmk__env_option(PCMK__ENV_CLUSTER_TYPE); + if (cluster != NULL) { + crm_info("Verifying configured cluster layer '%s'", cluster); + cluster_layer = pcmk_cluster_layer_invalid; + #if SUPPORT_COROSYNC - /* If nothing is defined in the environment, try corosync (if supported) */ - if (cluster == NULL) { - crm_debug("Testing with Corosync"); - cluster_type = pcmk__corosync_detect(); - if (cluster_type != pcmk_cluster_unknown) { - detected = true; - goto done; + if (pcmk__str_eq(cluster, PCMK_VALUE_COROSYNC, pcmk__str_casei)) { + cluster_layer = pcmk_cluster_layer_corosync; } - } -#endif +#endif // SUPPORT_COROSYNC - /* Something was defined in the environment, test it against what we support */ - crm_info("Verifying cluster type: '%s'", - ((cluster == NULL)? "-unspecified-" : cluster)); - if (cluster == NULL) { + if (cluster_layer == pcmk_cluster_layer_invalid) { + crm_notice("This installation does not support the '%s' cluster " + "infrastructure: terminating", + cluster); + crm_exit(CRM_EX_FATAL); + } + crm_info("Assuming an active '%s' cluster", cluster); + } else { + // Nothing configured, so test supported cluster layers #if SUPPORT_COROSYNC - } else if (pcmk__str_eq(cluster, "corosync", pcmk__str_casei)) { - cluster_type = pcmk_cluster_corosync; -#endif + crm_debug("Testing with Corosync"); + if (pcmk__corosync_is_active()) { + cluster_layer = pcmk_cluster_layer_corosync; + } +#endif // SUPPORT_COROSYNC - } else { - cluster_type = pcmk_cluster_invalid; - goto done; /* Keep the compiler happy when no stacks are supported */ + if (cluster_layer == pcmk_cluster_layer_unknown) { + crm_notice("Could not determine the current cluster layer"); + } else { + crm_info("Detected an active '%s' cluster", + pcmk_cluster_layer_text(cluster_layer)); + } } - done: - if (cluster_type == pcmk_cluster_unknown) { - crm_notice("Could not determine the current cluster type"); + return cluster_layer; +} - } else if (cluster_type == pcmk_cluster_invalid) { - crm_notice("This installation does not support the '%s' cluster infrastructure: terminating.", - cluster); - crm_exit(CRM_EX_FATAL); +// Deprecated functions kept only for backward API compatibility +// LCOV_EXCL_START - } else { - crm_info("%s an active '%s' cluster", - (detected? "Detected" : "Assuming"), - name_for_cluster_type(cluster_type)); +#include <crm/cluster/compat.h> + +void +set_uuid(xmlNode *xml, const char *attr, crm_node_t *node) +{ + crm_xml_add(xml, attr, pcmk__cluster_node_uuid(node)); +} + +gboolean +crm_cluster_connect(pcmk_cluster_t *cluster) +{ + return pcmk_cluster_connect(cluster) == pcmk_rc_ok; +} + +void +crm_cluster_disconnect(pcmk_cluster_t *cluster) +{ + pcmk_cluster_disconnect(cluster); +} + +const char * +name_for_cluster_type(enum cluster_type_e type) +{ + switch (type) { + case pcmk_cluster_corosync: + return "corosync"; + case pcmk_cluster_unknown: + return "unknown"; + case pcmk_cluster_invalid: + return "invalid"; } + crm_err("Invalid cluster type: %d", type); + return "invalid"; +} - return cluster_type; +enum cluster_type_e +get_cluster_type(void) +{ + return (enum cluster_type_e) pcmk_get_cluster_layer(); } -/*! - * \brief Check whether the local cluster is a Corosync cluster - * - * \return TRUE if the local cluster is a Corosync cluster, otherwise FALSE - */ gboolean is_corosync_cluster(void) { - return get_cluster_type() == pcmk_cluster_corosync; + return pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync; } -// Deprecated functions kept only for backward API compatibility -// LCOV_EXCL_START +gboolean +send_cluster_message(const crm_node_t *node, enum crm_ais_msg_types service, + const xmlNode *data, gboolean ordered) +{ + return pcmk__cluster_send_message(node, service, data); +} -#include <crm/cluster/compat.h> +const char * +crm_peer_uuid(crm_node_t *peer) +{ + return pcmk__cluster_node_uuid(peer); +} -void -set_uuid(xmlNode *xml, const char *attr, crm_node_t *node) +char * +get_node_name(uint32_t nodeid) +{ + return pcmk__cluster_node_name(nodeid); +} + +const char * +get_local_node_name(void) +{ + return pcmk__cluster_local_node_name(); +} + +const char * +crm_peer_uname(const char *uuid) { - crm_xml_add(xml, attr, crm_peer_uuid(node)); + return pcmk__node_name_from_uuid(uuid); } // LCOV_EXCL_STOP diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c index 08280ce..ff4da60 100644 --- a/lib/cluster/corosync.c +++ b/lib/cluster/corosync.c @@ -1,5 +1,5 @@ /* - * Copyright 2004-2022 the Pacemaker project contributors + * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -9,32 +9,30 @@ #include <crm_internal.h> -#include <sys/socket.h> -#include <netinet/in.h> #include <arpa/inet.h> +#include <inttypes.h> // PRIu64, PRIx32 #include <netdb.h> -#include <inttypes.h> // PRIu64 - -#include <bzlib.h> - -#include <crm/common/ipc.h> -#include <crm/cluster/internal.h> -#include <crm/common/mainloop.h> +#include <netinet/in.h> +#include <stdbool.h> +#include <sys/socket.h> #include <sys/utsname.h> -#include <qb/qbipcc.h> -#include <qb/qbutil.h> - +#include <bzlib.h> +#include <corosync/cfg.h> +#include <corosync/cmap.h> #include <corosync/corodefs.h> #include <corosync/corotypes.h> #include <corosync/hdb.h> -#include <corosync/cfg.h> -#include <corosync/cmap.h> #include <corosync/quorum.h> +#include <qb/qbipcc.h> +#include <qb/qbutil.h> -#include <crm/msg_xml.h> +#include <crm/cluster/internal.h> +#include <crm/common/ipc.h> +#include <crm/common/ipc_internal.h> // PCMK__SPECIAL_PID +#include <crm/common/mainloop.h> +#include <crm/common/xml.h> -#include <crm/common/ipc_internal.h> /* PCMK__SPECIAL_PID* */ #include "crmcluster_private.h" static quorum_handle_t pcmk_quorum_handle = 0; @@ -54,7 +52,9 @@ static gboolean (*quorum_app_callback)(unsigned long long seq, char * pcmk__corosync_uuid(const crm_node_t *node) { - if ((node != NULL) && is_corosync_cluster()) { + CRM_ASSERT(pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync); + + if (node != NULL) { if (node->id > 0) { return crm_strdup_printf("%u", node->id); } else { @@ -114,7 +114,7 @@ pcmk__corosync_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid) int rv; if (nodeid == 0) { - nodeid = get_local_nodeid(0); + nodeid = pcmk__cpg_local_nodeid(0); } if (cmap_handle == 0 && local_handle == 0) { @@ -217,13 +217,14 @@ bail: * \internal * \brief Disconnect from Corosync cluster * - * \param[in,out] cluster Cluster connection to disconnect + * \param[in,out] cluster Cluster object to disconnect */ void -pcmk__corosync_disconnect(crm_cluster_t *cluster) +pcmk__corosync_disconnect(pcmk_cluster_t *cluster) { - cluster_disconnect_cpg(cluster); - if (pcmk_quorum_handle) { + pcmk__cpg_disconnect(cluster); + + if (pcmk_quorum_handle != 0) { quorum_finalize(pcmk_quorum_handle); pcmk_quorum_handle = 0; } @@ -309,12 +310,13 @@ quorum_notification_cb(quorum_handle_t handle, uint32_t quorate, crm_debug("Member[%d] %u ", i, id); /* Get this node's peer cache entry (adding one if not already there) */ - node = crm_get_peer(id, NULL); + node = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster_member); if (node->uname == NULL) { char *name = pcmk__corosync_name(0, id); crm_info("Obtaining name for new node %u", id); - node = crm_get_peer(id, name); + node = pcmk__get_node(id, name, NULL, + pcmk__node_search_cluster_member); free(name); } @@ -445,106 +447,101 @@ pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long, * \internal * \brief Connect to Corosync cluster layer * - * \param[in,out] cluster Initialized cluster object to connect + * \param[in,out] cluster Initialized cluster object to connect + * + * \return Standard Pacemaker return code */ -gboolean -pcmk__corosync_connect(crm_cluster_t *cluster) +int +pcmk__corosync_connect(pcmk_cluster_t *cluster) { crm_node_t *peer = NULL; - enum cluster_type_e stack = get_cluster_type(); + const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer(); + const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer); + int rc = pcmk_rc_ok; - crm_peer_init(); + pcmk__cluster_init_node_caches(); - if (stack != pcmk_cluster_corosync) { - crm_err("Invalid cluster type: %s " CRM_XS " stack=%d", - name_for_cluster_type(stack), stack); - return FALSE; + if (cluster_layer != pcmk_cluster_layer_corosync) { + crm_err("Invalid cluster layer: %s " CRM_XS " cluster_layer=%d", + cluster_layer_s, cluster_layer); + return EINVAL; } - if (!cluster_connect_cpg(cluster)) { - // Error message was logged by cluster_connect_cpg() - return FALSE; + rc = pcmk__cpg_connect(cluster); + if (rc != pcmk_rc_ok) { + // Error message was logged by pcmk__cpg_connect() + return rc; } - crm_info("Connection to %s established", name_for_cluster_type(stack)); + crm_info("Connection to %s established", cluster_layer_s); - cluster->nodeid = get_local_nodeid(0); + cluster->nodeid = pcmk__cpg_local_nodeid(0); if (cluster->nodeid == 0) { crm_err("Could not determine local node ID"); - return FALSE; + return ENXIO; } - cluster->uname = get_node_name(0); + cluster->uname = pcmk__cluster_node_name(0); if (cluster->uname == NULL) { crm_err("Could not determine local node name"); - return FALSE; + return ENXIO; } // Ensure local node always exists in peer cache - peer = crm_get_peer(cluster->nodeid, cluster->uname); + peer = pcmk__get_node(cluster->nodeid, cluster->uname, NULL, + pcmk__node_search_cluster_member); cluster->uuid = pcmk__corosync_uuid(peer); - return TRUE; + return pcmk_rc_ok; } /*! * \internal * \brief Check whether a Corosync cluster is active * - * \return pcmk_cluster_corosync if Corosync is found, else pcmk_cluster_unknown + * \return \c true if Corosync is found active, or \c false otherwise */ -enum cluster_type_e -pcmk__corosync_detect(void) +bool +pcmk__corosync_is_active(void) { - int rc = CS_OK; cmap_handle_t handle; + int rc = pcmk__init_cmap(&handle); - rc = pcmk__init_cmap(&handle); - - switch(rc) { - case CS_OK: - break; - case CS_ERR_SECURITY: - crm_debug("Failed to initialize the cmap API: Permission denied (%d)", rc); - /* It's there, we just can't talk to it. - * Good enough for us to identify as 'corosync' - */ - return pcmk_cluster_corosync; - - default: - crm_info("Failed to initialize the cmap API: %s (%d)", - pcmk__cs_err_str(rc), rc); - return pcmk_cluster_unknown; + if (rc == CS_OK) { + cmap_finalize(handle); + return true; } - cmap_finalize(handle); - return pcmk_cluster_corosync; + crm_info("Failed to initialize the cmap API: %s (%d)", + pcmk__cs_err_str(rc), rc); + return false; } /*! + * \internal * \brief Check whether a Corosync cluster peer is active * * \param[in] node Node to check * - * \return TRUE if \p node is an active Corosync peer, otherwise FALSE + * \return \c true if \p node is an active Corosync peer, or \c false otherwise */ -gboolean -crm_is_corosync_peer_active(const crm_node_t *node) +bool +pcmk__corosync_is_peer_active(const crm_node_t *node) { if (node == NULL) { crm_trace("Corosync peer inactive: NULL"); - return FALSE; - - } else if (!pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)) { + return false; + } + if (!pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_none)) { crm_trace("Corosync peer %s inactive: state=%s", node->uname, node->state); - return FALSE; - - } else if (!pcmk_is_set(node->processes, crm_proc_cpg)) { - crm_trace("Corosync peer %s inactive: processes=%.16x", + return false; + } + if (!pcmk_is_set(node->processes, crm_proc_cpg)) { + crm_trace("Corosync peer %s inactive " CRM_XS " processes=%.16" PRIx32, node->uname, node->processes); - return FALSE; + return false; } - return TRUE; + return true; } /*! @@ -606,7 +603,7 @@ pcmk__corosync_add_nodes(xmlNode *xml_parent) goto bail; } - crm_peer_init(); + pcmk__cluster_init_node_caches(); crm_trace("Initializing Corosync node list"); for (lpc = 0; TRUE; lpc++) { uint32_t nodeid = 0; @@ -640,17 +637,17 @@ pcmk__corosync_add_nodes(xmlNode *xml_parent) if (nodeid > 0 || name != NULL) { crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name); - crm_get_peer(nodeid, name); + pcmk__get_node(nodeid, name, NULL, pcmk__node_search_cluster_member); } if (nodeid > 0 && name != NULL) { any = true; if (xml_parent) { - xmlNode *node = create_xml_node(xml_parent, XML_CIB_TAG_NODE); + xmlNode *node = pcmk__xe_create(xml_parent, PCMK_XE_NODE); crm_xml_set_id(node, "%u", nodeid); - crm_xml_add(node, XML_ATTR_UNAME, name); + crm_xml_add(node, PCMK_XA_UNAME, name); } } @@ -812,3 +809,17 @@ bail: cmap_finalize(cmap_handle); return result; } + +// Deprecated functions kept only for backward API compatibility +// LCOV_EXCL_START + +#include <crm/cluster/compat.h> + +gboolean +crm_is_corosync_peer_active(const crm_node_t *node) +{ + return pcmk__corosync_is_peer_active(node); +} + +// LCOV_EXCL_STOP +// End deprecated API diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c index d1decc6..62d39a6 100644 --- a/lib/cluster/cpg.c +++ b/lib/cluster/cpg.c @@ -1,5 +1,5 @@ /* - * Copyright 2004-2023 the Pacemaker project contributors + * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -8,37 +8,40 @@ */ #include <crm_internal.h> -#include <bzlib.h> -#include <sys/socket.h> -#include <netinet/in.h> + #include <arpa/inet.h> +#include <inttypes.h> // PRIu32 #include <netdb.h> - -#include <crm/common/ipc.h> -#include <crm/cluster/internal.h> -#include <crm/common/mainloop.h> +#include <netinet/in.h> +#include <stdbool.h> +#include <stdint.h> // uint32_t +#include <sys/socket.h> +#include <sys/types.h> // size_t #include <sys/utsname.h> -#include <qb/qbipc_common.h> -#include <qb/qbipcc.h> -#include <qb/qbutil.h> - +#include <bzlib.h> #include <corosync/corodefs.h> #include <corosync/corotypes.h> #include <corosync/hdb.h> #include <corosync/cpg.h> +#include <qb/qbipc_common.h> +#include <qb/qbipcc.h> +#include <qb/qbutil.h> -#include <crm/msg_xml.h> +#include <crm/cluster/internal.h> +#include <crm/common/ipc.h> +#include <crm/common/ipc_internal.h> // PCMK__SPECIAL_PID +#include <crm/common/mainloop.h> +#include <crm/common/xml.h> -#include <crm/common/ipc_internal.h> /* PCMK__SPECIAL_PID* */ #include "crmcluster_private.h" -/* @TODO Once we can update the public API to require crm_cluster_t* in more +/* @TODO Once we can update the public API to require pcmk_cluster_t* in more * functions, we can ditch this in favor of cluster->cpg_handle. */ static cpg_handle_t pcmk_cpg_handle = 0; -// @TODO These could be moved to crm_cluster_t* at that time as well +// @TODO These could be moved to pcmk_cluster_t* at that time as well static bool cpg_evicted = false; static GList *cs_message_queue = NULL; static int cs_message_timer = 0; @@ -87,26 +90,7 @@ static void crm_cs_flush(gpointer data); } while (counter < max) /*! - * \brief Disconnect from Corosync CPG - * - * \param[in,out] cluster Cluster to disconnect - */ -void -cluster_disconnect_cpg(crm_cluster_t *cluster) -{ - pcmk_cpg_handle = 0; - if (cluster->cpg_handle) { - crm_trace("Disconnecting CPG"); - cpg_leave(cluster->cpg_handle, &cluster->group); - cpg_finalize(cluster->cpg_handle); - cluster->cpg_handle = 0; - - } else { - crm_info("No CPG connection"); - } -} - -/*! + * \internal * \brief Get the local Corosync node ID (via CPG) * * \param[in] handle CPG connection to use (or 0 to use new connection) @@ -114,7 +98,7 @@ cluster_disconnect_cpg(crm_cluster_t *cluster) * \return Corosync ID of local node (or 0 if not known) */ uint32_t -get_local_nodeid(cpg_handle_t handle) +pcmk__cpg_local_nodeid(cpg_handle_t handle) { cs_error_t rc = CS_OK; int retries = 0; @@ -125,15 +109,18 @@ get_local_nodeid(cpg_handle_t handle) uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; - int rv; + int rv = 0; - if(local_nodeid != 0) { + if (local_nodeid != 0) { return local_nodeid; } - if(handle == 0) { + if (handle == 0) { crm_trace("Creating connection"); - cs_repeat(rc, retries, 5, cpg_model_initialize(&local_handle, CPG_MODEL_V1, (cpg_model_data_t *)&cpg_model_info, NULL)); + cs_repeat(rc, retries, 5, + cpg_model_initialize(&local_handle, CPG_MODEL_V1, + (cpg_model_data_t *) &cpg_model_info, + NULL)); if (rc != CS_OK) { crm_err("Could not connect to the CPG API: %s (%d)", cs_strerror(rc), rc); @@ -147,14 +134,16 @@ get_local_nodeid(cpg_handle_t handle) goto bail; } - /* CPG provider run as root (in given user namespace, anyway)? */ - if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, - &found_uid, &found_gid))) { + // CPG provider run as root (at least in given user namespace)? + rv = crm_ipc_is_authentic_process(fd, (uid_t) 0, (gid_t) 0, &found_pid, + &found_uid, &found_gid); + if (rv == 0) { crm_err("CPG provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; + } else if (rv < 0) { crm_err("Could not verify authenticity of CPG provider: %s (%d)", strerror(-rv), -rv); @@ -174,7 +163,7 @@ get_local_nodeid(cpg_handle_t handle) } bail: - if(handle == 0) { + if (handle == 0) { crm_trace("Closing connection"); cpg_finalize(local_handle); } @@ -279,7 +268,7 @@ static int pcmk_cpg_dispatch(gpointer user_data) { cs_error_t rc = CS_OK; - crm_cluster_t *cluster = (crm_cluster_t *) user_data; + pcmk_cluster_t *cluster = (pcmk_cluster_t *) user_data; rc = cpg_dispatch(cluster->cpg_handle, CS_DISPATCH_ONE); if (rc != CS_OK) { @@ -422,59 +411,64 @@ check_message_sanity(const pcmk__cpg_msg_t *msg) } /*! + * \internal * \brief Extract text data from a Corosync CPG message * - * \param[in] handle CPG connection (to get local node ID if not known) - * \param[in] nodeid Corosync ID of node that sent message - * \param[in] pid Process ID of message sender (for logging only) - * \param[in,out] content CPG message - * \param[out] kind If not NULL, will be set to CPG header ID - * (which should be an enum crm_ais_msg_class value, - * currently always crm_class_cluster) - * \param[out] from If not NULL, will be set to sender uname - * (valid for the lifetime of \p content) + * \param[in] handle CPG connection (to get local node ID if not known) + * \param[in] sender_id Corosync ID of node that sent message + * \param[in] pid Process ID of message sender (for logging only) + * \param[in,out] content CPG message + * \param[out] kind If not \c NULL, will be set to CPG header ID + * (which should be an <tt>enum crm_ais_msg_class</tt> + * value, currently always \c crm_class_cluster) + * \param[out] from If not \c NULL, will be set to sender uname + * (valid for the lifetime of \p content) * * \return Newly allocated string with message data - * \note It is the caller's responsibility to free the return value with free(). + * + * \note The caller is responsible for freeing the return value using \c free(). */ char * -pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void *content, - uint32_t *kind, const char **from) +pcmk__cpg_message_data(cpg_handle_t handle, uint32_t sender_id, uint32_t pid, + void *content, uint32_t *kind, const char **from) { char *data = NULL; - pcmk__cpg_msg_t *msg = (pcmk__cpg_msg_t *) content; + pcmk__cpg_msg_t *msg = content; - if(handle) { + if (handle != 0) { // Do filtering and field massaging - uint32_t local_nodeid = get_local_nodeid(handle); - const char *local_name = get_local_node_name(); + uint32_t local_nodeid = pcmk__cpg_local_nodeid(handle); + const char *local_name = pcmk__cluster_local_node_name(); - if (msg->sender.id > 0 && msg->sender.id != nodeid) { - crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, msg->sender.id); + if ((msg->sender.id != 0) && (msg->sender.id != sender_id)) { + crm_err("Nodeid mismatch from %" PRIu32 ".%" PRIu32 + ": claimed nodeid=%" PRIu32, + sender_id, pid, msg->sender.id); return NULL; - - } else if (msg->host.id != 0 && (local_nodeid != msg->host.id)) { - /* Not for us */ - crm_trace("Not for us: %u != %u", msg->host.id, local_nodeid); + } + if ((msg->host.id != 0) && (local_nodeid != msg->host.id)) { + crm_trace("Not for us: %" PRIu32" != %" PRIu32, + msg->host.id, local_nodeid); return NULL; - } else if (msg->host.size != 0 && !pcmk__str_eq(msg->host.uname, local_name, pcmk__str_casei)) { - /* Not for us */ + } + if ((msg->host.size > 0) + && !pcmk__str_eq(msg->host.uname, local_name, pcmk__str_casei)) { + crm_trace("Not for us: %s != %s", msg->host.uname, local_name); return NULL; } - msg->sender.id = nodeid; + msg->sender.id = sender_id; if (msg->sender.size == 0) { - crm_node_t *peer = crm_get_peer(nodeid, NULL); - - if (peer == NULL) { - crm_err("Peer with nodeid=%u is unknown", nodeid); + const crm_node_t *peer = + pcmk__get_node(sender_id, NULL, NULL, + pcmk__node_search_cluster_member); - } else if (peer->uname == NULL) { - crm_err("No uname for peer with nodeid=%u", nodeid); + if (peer->uname == NULL) { + crm_err("No uname for peer with nodeid=%u", sender_id); } else { - crm_notice("Fixing uname for peer with nodeid=%u", nodeid); + crm_notice("Fixing uname for peer with nodeid=%u", sender_id); msg->sender.size = strlen(peer->uname); memset(msg->sender.uname, 0, MAX_NAME); memcpy(msg->sender.uname, peer->uname, msg->sender.size); @@ -493,7 +487,7 @@ pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void *from = msg->sender.uname; } - if (msg->is_compressed && msg->size > 0) { + if (msg->is_compressed && (msg->size > 0)) { int rc = BZ_OK; char *uncompressed = NULL; unsigned int new_size = msg->size + 1; @@ -503,13 +497,15 @@ pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void } crm_trace("Decompressing message data"); - uncompressed = calloc(1, new_size); - rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, msg->data, msg->compressed_size, 1, 0); + uncompressed = pcmk__assert_alloc(1, new_size); + rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, msg->data, + msg->compressed_size, 1, 0); rc = pcmk__bzlib2rc(rc); if (rc != pcmk_rc_ok) { - crm_err("Decompression failed: %s " CRM_XS " rc=%d", pcmk_rc_str(rc), rc); + crm_err("Decompression failed: %s " CRM_XS " rc=%d", + pcmk_rc_str(rc), rc); free(uncompressed); goto badmsg; } @@ -526,7 +522,8 @@ pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void } // Is this necessary? - crm_get_peer(msg->sender.id, msg->sender.uname); + pcmk__get_node(msg->sender.id, msg->sender.uname, NULL, + pcmk__node_search_cluster_member); crm_trace("Payload: %.200s", data); return data; @@ -627,8 +624,9 @@ node_left(const char *cpg_group_name, int event_counter, const struct cpg_address **sorted_member_list, size_t member_list_entries) { - crm_node_t *peer = pcmk__search_cluster_node_cache(cpg_peer->nodeid, - NULL, NULL); + crm_node_t *peer = + pcmk__search_node_caches(cpg_peer->nodeid, NULL, + pcmk__node_search_cluster_member); const struct cpg_address **rival = NULL; /* Most CPG-related Pacemaker code assumes that only one process on a node @@ -656,7 +654,7 @@ node_left(const char *cpg_group_name, int event_counter, cpgreason2str(cpg_peer->reason)); if (peer != NULL) { crm_update_peer_proc(__func__, peer, crm_proc_cpg, - OFFLINESTATUS); + PCMK_VALUE_OFFLINE); } } else if (cpg_peer->nodeid == local_nodeid) { crm_warn("Group %s event %d: duplicate local pid %u left%s", @@ -672,72 +670,81 @@ node_left(const char *cpg_group_name, int event_counter, } /*! + * \internal * \brief Handle a CPG configuration change event * * \param[in] handle CPG connection - * \param[in] cpg_name CPG group name + * \param[in] group_name CPG group name * \param[in] member_list List of current CPG members * \param[in] member_list_entries Number of entries in \p member_list * \param[in] left_list List of CPG members that left * \param[in] left_list_entries Number of entries in \p left_list * \param[in] joined_list List of CPG members that joined * \param[in] joined_list_entries Number of entries in \p joined_list + * + * \note This is of type \c cpg_confchg_fn_t, intended to be used in a + * \c cpg_callbacks_t object. */ void -pcmk_cpg_membership(cpg_handle_t handle, - const struct cpg_name *groupName, - const struct cpg_address *member_list, size_t member_list_entries, - const struct cpg_address *left_list, size_t left_list_entries, - const struct cpg_address *joined_list, size_t joined_list_entries) +pcmk__cpg_confchg_cb(cpg_handle_t handle, + const struct cpg_name *group_name, + const struct cpg_address *member_list, + size_t member_list_entries, + const struct cpg_address *left_list, + size_t left_list_entries, + const struct cpg_address *joined_list, + size_t joined_list_entries) { - int i; - gboolean found = FALSE; static int counter = 0; - uint32_t local_nodeid = get_local_nodeid(handle); - const struct cpg_address **sorted; - sorted = malloc(member_list_entries * sizeof(const struct cpg_address *)); - CRM_ASSERT(sorted != NULL); + bool found = false; + uint32_t local_nodeid = pcmk__cpg_local_nodeid(handle); + const struct cpg_address **sorted = NULL; + + sorted = pcmk__assert_alloc(member_list_entries, + sizeof(const struct cpg_address *)); for (size_t iter = 0; iter < member_list_entries; iter++) { sorted[iter] = member_list + iter; } - /* so that the cross-matching multiply-subscribed nodes is then cheap */ + + // So that the cross-matching of multiply-subscribed nodes is then cheap qsort(sorted, member_list_entries, sizeof(const struct cpg_address *), cmp_member_list_nodeid); - for (i = 0; i < left_list_entries; i++) { - node_left(groupName->value, counter, local_nodeid, &left_list[i], + for (int i = 0; i < left_list_entries; i++) { + node_left(group_name->value, counter, local_nodeid, &left_list[i], sorted, member_list_entries); } free(sorted); sorted = NULL; - for (i = 0; i < joined_list_entries; i++) { + for (int i = 0; i < joined_list_entries; i++) { crm_info("Group %s event %d: node %u pid %u joined%s", - groupName->value, counter, joined_list[i].nodeid, + group_name->value, counter, joined_list[i].nodeid, joined_list[i].pid, cpgreason2str(joined_list[i].reason)); } - for (i = 0; i < member_list_entries; i++) { - crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL); + for (int i = 0; i < member_list_entries; i++) { + crm_node_t *peer = pcmk__get_node(member_list[i].nodeid, NULL, NULL, + pcmk__node_search_cluster_member); if (member_list[i].nodeid == local_nodeid && member_list[i].pid != getpid()) { // See the note in node_left() crm_warn("Group %s event %d: detected duplicate local pid %u", - groupName->value, counter, member_list[i].pid); + group_name->value, counter, member_list[i].pid); continue; } crm_info("Group %s event %d: %s (node %u pid %u) is member", - groupName->value, counter, peer_name(peer), + group_name->value, counter, peer_name(peer), member_list[i].nodeid, member_list[i].pid); /* If the caller left auto-reaping enabled, this will also update the * state to member. */ peer = crm_update_peer_proc(__func__, peer, crm_proc_cpg, - ONLINESTATUS); + PCMK_VALUE_ONLINE); if (peer && peer->state && strcmp(peer->state, CRM_NODE_MEMBER)) { /* The node is a CPG member, but we currently think it's not a @@ -755,19 +762,20 @@ pcmk_cpg_membership(cpg_handle_t handle, } else if (now > (peer->when_lost + 60)) { // If it persists for more than a minute, update the state - crm_warn("Node %u is member of group %s but was believed offline", - member_list[i].nodeid, groupName->value); + crm_warn("Node %u is member of group %s but was believed " + "offline", + member_list[i].nodeid, group_name->value); pcmk__update_peer_state(__func__, peer, CRM_NODE_MEMBER, 0); } } if (local_nodeid == member_list[i].nodeid) { - found = TRUE; + found = true; } } if (!found) { - crm_err("Local node was evicted from group %s", groupName->value); + crm_err("Local node was evicted from group %s", group_name->value); cpg_evicted = true; } @@ -775,14 +783,50 @@ pcmk_cpg_membership(cpg_handle_t handle, } /*! - * \brief Connect to Corosync CPG + * \brief Set the CPG deliver callback function for a cluster object * * \param[in,out] cluster Cluster object + * \param[in] fn Deliver callback function to set * - * \return TRUE on success, otherwise FALSE + * \return Standard Pacemaker return code */ -gboolean -cluster_connect_cpg(crm_cluster_t *cluster) +int +pcmk_cpg_set_deliver_fn(pcmk_cluster_t *cluster, cpg_deliver_fn_t fn) +{ + if (cluster == NULL) { + return EINVAL; + } + cluster->cpg.cpg_deliver_fn = fn; + return pcmk_rc_ok; +} + +/*! + * \brief Set the CPG config change callback function for a cluster object + * + * \param[in,out] cluster Cluster object + * \param[in] fn Configuration change callback function to set + * + * \return Standard Pacemaker return code + */ +int +pcmk_cpg_set_confchg_fn(pcmk_cluster_t *cluster, cpg_confchg_fn_t fn) +{ + if (cluster == NULL) { + return EINVAL; + } + cluster->cpg.cpg_confchg_fn = fn; + return pcmk_rc_ok; +} + +/*! + * \brief Connect to Corosync CPG + * + * \param[in,out] cluster Initialized cluster object to connect + * + * \return Standard Pacemaker return code + */ +int +pcmk__cpg_connect(pcmk_cluster_t *cluster) { cs_error_t rc; int fd = -1; @@ -848,7 +892,7 @@ cluster_connect_cpg(crm_cluster_t *cluster) goto bail; } - id = get_local_nodeid(handle); + id = pcmk__cpg_local_nodeid(handle); if (id == 0) { crm_err("Could not get local node id from the CPG API"); goto bail; @@ -870,54 +914,52 @@ cluster_connect_cpg(crm_cluster_t *cluster) bail: if (rc != CS_OK) { cpg_finalize(handle); - return FALSE; + // @TODO Map rc to more specific Pacemaker return code + return ENOTCONN; } - peer = crm_get_peer(id, NULL); - crm_update_peer_proc(__func__, peer, crm_proc_cpg, ONLINESTATUS); - return TRUE; + peer = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster_member); + crm_update_peer_proc(__func__, peer, crm_proc_cpg, PCMK_VALUE_ONLINE); + return pcmk_rc_ok; } /*! * \internal - * \brief Send an XML message via Corosync CPG - * - * \param[in] msg XML message to send - * \param[in] node Cluster node to send message to - * \param[in] dest Type of message to send + * \brief Disconnect from Corosync CPG * - * \return TRUE on success, otherwise FALSE + * \param[in,out] cluster Cluster object to disconnect */ -bool -pcmk__cpg_send_xml(const xmlNode *msg, const crm_node_t *node, - enum crm_ais_msg_types dest) +void +pcmk__cpg_disconnect(pcmk_cluster_t *cluster) { - bool rc = true; - char *data = NULL; + pcmk_cpg_handle = 0; + if (cluster->cpg_handle != 0) { + crm_trace("Disconnecting CPG"); + cpg_leave(cluster->cpg_handle, &cluster->group); + cpg_finalize(cluster->cpg_handle); + cluster->cpg_handle = 0; - data = dump_xml_unformatted(msg); - rc = send_cluster_text(crm_class_cluster, data, FALSE, node, dest); - free(data); - return rc; + } else { + crm_info("No CPG connection"); + } } /*! * \internal * \brief Send string data via Corosync CPG * - * \param[in] msg_class Message class (to set as CPG header ID) - * \param[in] data Data to send - * \param[in] local What to set as host "local" value (which is never used) - * \param[in] node Cluster node to send message to - * \param[in] dest Type of message to send + * \param[in] data Data to send + * \param[in] local What to set as host "local" value (which is never used) + * \param[in] node Cluster node to send message to + * \param[in] dest Type of message to send * - * \return TRUE on success, otherwise FALSE + * \return \c true on success, or \c false otherwise */ -gboolean -send_cluster_text(enum crm_ais_msg_class msg_class, const char *data, - gboolean local, const crm_node_t *node, - enum crm_ais_msg_types dest) +static bool +send_cpg_text(const char *data, bool local, const crm_node_t *node, + enum crm_ais_msg_types dest) { + // @COMPAT Drop local argument when send_cluster_text is dropped static int msg_id = 0; static int local_pid = 0; static int local_name_len = 0; @@ -926,20 +968,11 @@ send_cluster_text(enum crm_ais_msg_class msg_class, const char *data, char *target = NULL; struct iovec *iov; pcmk__cpg_msg_t *msg = NULL; - enum crm_ais_msg_types sender = text2msg_type(crm_system_name); - switch (msg_class) { - case crm_class_cluster: - break; - default: - crm_err("Invalid message class: %d", msg_class); - return FALSE; - } - - CRM_CHECK(dest != crm_msg_ais, return FALSE); + CRM_CHECK(dest != crm_msg_ais, return false); if (local_name == NULL) { - local_name = get_local_node_name(); + local_name = pcmk__cluster_local_node_name(); } if ((local_name_len == 0) && (local_name != NULL)) { local_name_len = strlen(local_name); @@ -953,39 +986,38 @@ send_cluster_text(enum crm_ais_msg_class msg_class, const char *data, local_pid = getpid(); } - if (sender == crm_msg_none) { - sender = local_pid; - } - - msg = calloc(1, sizeof(pcmk__cpg_msg_t)); + msg = pcmk__assert_alloc(1, sizeof(pcmk__cpg_msg_t)); msg_id++; msg->id = msg_id; - msg->header.id = msg_class; + msg->header.id = crm_class_cluster; msg->header.error = CS_OK; msg->host.type = dest; msg->host.local = local; - if (node) { - if (node->uname) { - target = strdup(node->uname); + if (node != NULL) { + if (node->uname != NULL) { + target = pcmk__str_copy(node->uname); msg->host.size = strlen(node->uname); memset(msg->host.uname, 0, MAX_NAME); memcpy(msg->host.uname, node->uname, msg->host.size); + } else { target = crm_strdup_printf("%u", node->id); } msg->host.id = node->id; + } else { - target = strdup("all"); + target = pcmk__str_copy("all"); } msg->sender.id = 0; - msg->sender.type = sender; + msg->sender.type = pcmk__cluster_parse_msg_type(crm_system_name); msg->sender.pid = local_pid; msg->sender.size = local_name_len; memset(msg->sender.uname, 0, MAX_NAME); + if ((local_name != NULL) && (msg->sender.size != 0)) { memcpy(msg->sender.uname, local_name, msg->sender.size); } @@ -1000,10 +1032,9 @@ send_cluster_text(enum crm_ais_msg_class msg_class, const char *data, } else { char *compressed = NULL; unsigned int new_size = 0; - char *uncompressed = strdup(data); - if (pcmk__compress(uncompressed, (unsigned int) msg->size, 0, - &compressed, &new_size) == pcmk_rc_ok) { + if (pcmk__compress(data, (unsigned int) msg->size, 0, &compressed, + &new_size) == pcmk_rc_ok) { msg->header.size = sizeof(pcmk__cpg_msg_t) + new_size; msg = pcmk__realloc(msg, msg->header.size); @@ -1019,38 +1050,116 @@ send_cluster_text(enum crm_ais_msg_class msg_class, const char *data, memcpy(msg->data, data, msg->size); } - free(uncompressed); free(compressed); } - iov = calloc(1, sizeof(struct iovec)); + iov = pcmk__assert_alloc(1, sizeof(struct iovec)); iov->iov_base = msg; iov->iov_len = msg->header.size; - if (msg->compressed_size) { - crm_trace("Queueing CPG message %u to %s (%llu bytes, %d bytes compressed payload): %.200s", + if (msg->compressed_size > 0) { + crm_trace("Queueing CPG message %u to %s " + "(%llu bytes, %d bytes compressed payload): %.200s", msg->id, target, (unsigned long long) iov->iov_len, msg->compressed_size, data); } else { - crm_trace("Queueing CPG message %u to %s (%llu bytes, %d bytes payload): %.200s", + crm_trace("Queueing CPG message %u to %s " + "(%llu bytes, %d bytes payload): %.200s", msg->id, target, (unsigned long long) iov->iov_len, msg->size, data); } + free(target); cs_message_queue = g_list_append(cs_message_queue, iov); crm_cs_flush(&pcmk_cpg_handle); - return TRUE; + return true; } /*! - * \brief Get the message type equivalent of a string + * \internal + * \brief Send an XML message via Corosync CPG * - * \param[in] text String of message type + * \param[in] msg XML message to send + * \param[in] node Cluster node to send message to + * \param[in] dest Type of message to send * - * \return Message type equivalent of \p text + * \return TRUE on success, otherwise FALSE */ +bool +pcmk__cpg_send_xml(const xmlNode *msg, const crm_node_t *node, + enum crm_ais_msg_types dest) +{ + bool rc = true; + GString *data = g_string_sized_new(1024); + + pcmk__xml_string(msg, 0, data, 0); + + rc = send_cpg_text(data->str, false, node, dest); + g_string_free(data, TRUE); + return rc; +} + +// Deprecated functions kept only for backward API compatibility +// LCOV_EXCL_START + +#include <crm/cluster/compat.h> + +gboolean +cluster_connect_cpg(pcmk_cluster_t *cluster) +{ + return pcmk__cpg_connect(cluster) == pcmk_rc_ok; +} + +void +cluster_disconnect_cpg(pcmk_cluster_t *cluster) +{ + pcmk__cpg_disconnect(cluster); +} + +uint32_t +get_local_nodeid(cpg_handle_t handle) +{ + return pcmk__cpg_local_nodeid(handle); +} + +void +pcmk_cpg_membership(cpg_handle_t handle, + const struct cpg_name *group_name, + const struct cpg_address *member_list, + size_t member_list_entries, + const struct cpg_address *left_list, + size_t left_list_entries, + const struct cpg_address *joined_list, + size_t joined_list_entries) +{ + pcmk__cpg_confchg_cb(handle, group_name, member_list, member_list_entries, + left_list, left_list_entries, + joined_list, joined_list_entries); +} + +gboolean +send_cluster_text(enum crm_ais_msg_class msg_class, const char *data, + gboolean local, const crm_node_t *node, + enum crm_ais_msg_types dest) +{ + switch (msg_class) { + case crm_class_cluster: + return send_cpg_text(data, local, node, dest); + default: + crm_err("Invalid message class: %d", msg_class); + return FALSE; + } +} + +char * +pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, + void *content, uint32_t *kind, const char **from) +{ + return pcmk__cpg_message_data(handle, nodeid, pid, content, kind, from); +} + enum crm_ais_msg_types text2msg_type(const char *text) { @@ -1090,3 +1199,6 @@ text2msg_type(const char *text) } return type; } + +// LCOV_EXCL_STOP +// End deprecated API diff --git a/lib/cluster/crmcluster_private.h b/lib/cluster/crmcluster_private.h index 370bca5..ef1d54f 100644 --- a/lib/cluster/crmcluster_private.h +++ b/lib/cluster/crmcluster_private.h @@ -1,5 +1,5 @@ /* - * Copyright 2020-2023 the Pacemaker project contributors + * Copyright 2020-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -19,10 +19,14 @@ #include <glib.h> // G_GNUC_INTERNAL, gboolean #include <libxml/tree.h> // xmlNode -#include <crm/cluster.h> // cluster_type_e, crm_node_t +#if SUPPORT_COROSYNC +#include <corosync/cpg.h> // cpg_handle_t +#endif // SUPPORT_COROSYNC + +#include <crm/cluster.h> // crm_node_t G_GNUC_INTERNAL -enum cluster_type_e pcmk__corosync_detect(void); +bool pcmk__corosync_is_active(void); G_GNUC_INTERNAL bool pcmk__corosync_has_nodelist(void); @@ -35,10 +39,22 @@ char *pcmk__corosync_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid); G_GNUC_INTERNAL -gboolean pcmk__corosync_connect(crm_cluster_t *cluster); +int pcmk__corosync_connect(pcmk_cluster_t *cluster); + +G_GNUC_INTERNAL +void pcmk__corosync_disconnect(pcmk_cluster_t *cluster); + +G_GNUC_INTERNAL +bool pcmk__corosync_is_peer_active(const crm_node_t *node); + +G_GNUC_INTERNAL +int pcmk__cpg_connect(pcmk_cluster_t *cluster); + +G_GNUC_INTERNAL +void pcmk__cpg_disconnect(pcmk_cluster_t *cluster); G_GNUC_INTERNAL -void pcmk__corosync_disconnect(crm_cluster_t *cluster); +uint32_t pcmk__cpg_local_nodeid(cpg_handle_t handle); G_GNUC_INTERNAL bool pcmk__cpg_send_xml(const xmlNode *msg, const crm_node_t *node, diff --git a/lib/cluster/election.c b/lib/cluster/election.c index ebbae72..a3b4df0 100644 --- a/lib/cluster/election.c +++ b/lib/cluster/election.c @@ -1,5 +1,5 @@ /* - * Copyright 2004-2022 the Pacemaker project contributors + * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -12,7 +12,6 @@ #include <sys/time.h> #include <sys/resource.h> -#include <crm/msg_xml.h> #include <crm/common/xml.h> #include <crm/common/mainloop.h> @@ -298,8 +297,9 @@ election_vote(election_t *e) return; } - our_node = crm_get_peer(0, e->uname); - if ((our_node == NULL) || (crm_is_peer_active(our_node) == FALSE)) { + our_node = pcmk__get_node(0, e->uname, NULL, + pcmk__node_search_cluster_member); + if (!pcmk__cluster_is_node_active(our_node)) { crm_trace("Cannot vote in %s yet: local node not connected to cluster", e->name); return; @@ -310,13 +310,15 @@ election_vote(election_t *e) vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); e->count++; - crm_xml_add(vote, F_CRM_ELECTION_OWNER, our_node->uuid); - crm_xml_add_int(vote, F_CRM_ELECTION_ID, e->count); + crm_xml_add(vote, PCMK__XA_ELECTION_OWNER, our_node->uuid); + crm_xml_add_int(vote, PCMK__XA_ELECTION_ID, e->count); + // Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is actually microseconds get_uptime(&age); - crm_xml_add_timeval(vote, F_CRM_ELECTION_AGE_S, F_CRM_ELECTION_AGE_US, &age); + crm_xml_add_timeval(vote, PCMK__XA_ELECTION_AGE_SEC, + PCMK__XA_ELECTION_AGE_NANO_SEC, &age); - send_cluster_message(NULL, crm_msg_crmd, vote, TRUE); + pcmk__cluster_send_message(NULL, crm_msg_crmd, vote); free_xml(vote); crm_debug("Started %s round %d", e->name, e->count); @@ -355,7 +357,7 @@ election_check(election_t *e) } voted_size = g_hash_table_size(e->voted); - num_members = crm_active_peers(); + num_members = pcmk__cluster_num_active_nodes(); /* in the case of #voted > #members, it is better to * wait for the timeout and give the cluster time to @@ -372,7 +374,7 @@ election_check(election_t *e) crm_warn("Received too many votes in %s", e->name); g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) { - if (crm_is_peer_active(node)) { + if (pcmk__cluster_is_node_active(node)) { crm_warn("* expected vote: %s", node->uname); } } @@ -428,12 +430,12 @@ parse_election_message(const election_t *e, const xmlNode *message, vote->age.tv_sec = -1; vote->age.tv_usec = -1; - vote->op = crm_element_value(message, F_CRM_TASK); - vote->from = crm_element_value(message, F_CRM_HOST_FROM); - vote->version = crm_element_value(message, F_CRM_VERSION); - vote->election_owner = crm_element_value(message, F_CRM_ELECTION_OWNER); + vote->op = crm_element_value(message, PCMK__XA_CRM_TASK); + vote->from = crm_element_value(message, PCMK__XA_SRC); + vote->version = crm_element_value(message, PCMK_XA_VERSION); + vote->election_owner = crm_element_value(message, PCMK__XA_ELECTION_OWNER); - crm_element_value_int(message, F_CRM_ELECTION_ID, &(vote->election_id)); + crm_element_value_int(message, PCMK__XA_ELECTION_ID, &(vote->election_id)); if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL) || (vote->election_owner == NULL) || (vote->election_id < 0)) { @@ -448,9 +450,11 @@ parse_election_message(const election_t *e, const xmlNode *message, // Op-specific validation if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) { - // Only vote ops have uptime - crm_element_value_timeval(message, F_CRM_ELECTION_AGE_S, - F_CRM_ELECTION_AGE_US, &(vote->age)); + /* Only vote ops have uptime. + Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is in microseconds. + */ + crm_element_value_timeval(message, PCMK__XA_ELECTION_AGE_SEC, + PCMK__XA_ELECTION_AGE_NANO_SEC, &(vote->age)); if ((vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) { crm_warn("Cannot count %s %s from %s because it is missing uptime", (e? e->name : "election"), vote->op, vote->from); @@ -485,19 +489,12 @@ parse_election_message(const election_t *e, const xmlNode *message, static void record_vote(election_t *e, struct vote *vote) { - char *voter_copy = NULL; - char *vote_copy = NULL; - CRM_ASSERT(e && vote && vote->from && vote->op); + if (e->voted == NULL) { e->voted = pcmk__strkey_table(free, free); } - - voter_copy = strdup(vote->from); - vote_copy = strdup(vote->op); - CRM_ASSERT(voter_copy && vote_copy); - - g_hash_table_replace(e->voted, voter_copy, vote_copy); + pcmk__insert_dup(e->voted, vote->from, vote->op); } static void @@ -508,10 +505,10 @@ send_no_vote(crm_node_t *peer, struct vote *vote) xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote->from, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); - crm_xml_add(novote, F_CRM_ELECTION_OWNER, vote->election_owner); - crm_xml_add_int(novote, F_CRM_ELECTION_ID, vote->election_id); + crm_xml_add(novote, PCMK__XA_ELECTION_OWNER, vote->election_owner); + crm_xml_add_int(novote, PCMK__XA_ELECTION_ID, vote->election_id); - send_cluster_message(peer, crm_msg_crmd, novote, TRUE); + pcmk__cluster_send_message(peer, crm_msg_crmd, novote); free_xml(novote); } @@ -547,8 +544,10 @@ election_count_vote(election_t *e, const xmlNode *message, bool can_win) return election_error; } - your_node = crm_get_peer(0, vote.from); - our_node = crm_get_peer(0, e->uname); + your_node = pcmk__get_node(0, vote.from, NULL, + pcmk__node_search_cluster_member); + our_node = pcmk__get_node(0, e->uname, NULL, + pcmk__node_search_cluster_member); we_are_owner = (our_node != NULL) && pcmk__str_eq(our_node->uuid, vote.election_owner, pcmk__str_none); @@ -557,7 +556,7 @@ election_count_vote(election_t *e, const xmlNode *message, bool can_win) reason = "Not eligible"; we_lose = TRUE; - } else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) { + } else if (!pcmk__cluster_is_node_active(our_node)) { reason = "We are not part of the cluster"; log_level = LOG_ERR; we_lose = TRUE; @@ -567,7 +566,7 @@ election_count_vote(election_t *e, const xmlNode *message, bool can_win) reason = "Superseded"; done = TRUE; - } else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) { + } else if (!pcmk__cluster_is_node_active(your_node)) { /* Possibly we cached the message in the FSA queue at a point that it wasn't */ reason = "Peer is not part of our cluster"; log_level = LOG_WARNING; diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index f856cca..7eedc2e 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -1,5 +1,5 @@ /* - * Copyright 2004-2023 the Pacemaker project contributors + * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -13,6 +13,7 @@ # define _GNU_SOURCE #endif +#include <inttypes.h> // PRIu32 #include <sys/param.h> #include <sys/types.h> #include <stdio.h> @@ -22,7 +23,7 @@ #include <crm/common/ipc.h> #include <crm/common/xml_internal.h> #include <crm/cluster/internal.h> -#include <crm/msg_xml.h> +#include <crm/common/xml.h> #include <crm/stonith-ng.h> #include "crmcluster_private.h" @@ -32,6 +33,9 @@ * * Because cluster nodes can have conflicting names or UUIDs, * the hash table key is a uniquely generated ID. + * + * @COMPAT When this is internal, rename to cluster_node_member_cache and make + * static. */ GHashTable *crm_peer_cache = NULL; @@ -47,22 +51,22 @@ GHashTable *crm_peer_cache = NULL; * so it would be a good idea to merge them one day. * * libcluster provides two avenues for populating the cache: - * crm_remote_peer_get() and crm_remote_peer_cache_remove() directly manage it, - * while crm_remote_peer_cache_refresh() populates it via the CIB. + * pcmk__cluster_lookup_remote_node() and pcmk__cluster_forget_remote_node() + * directly manage it, while refresh_remote_nodes() populates it via the CIB. */ GHashTable *crm_remote_peer_cache = NULL; /* - * The known node cache tracks cluster and remote nodes that have been seen in + * The CIB cluster node cache tracks cluster nodes that have been seen in * the CIB. It is useful mainly when a caller needs to know about a node that * may no longer be in the membership, but doesn't want to add the node to the * main peer cache tables. */ -static GHashTable *known_node_cache = NULL; +static GHashTable *cluster_node_cib_cache = NULL; unsigned long long crm_peer_seq = 0; gboolean crm_have_quorum = FALSE; -static gboolean crm_autoreap = TRUE; +static bool autoreap = true; // Flag setting and clearing for crm_node_t:flags @@ -82,46 +86,80 @@ static gboolean crm_autoreap = TRUE; } while (0) static void update_peer_uname(crm_node_t *node, const char *uname); +static crm_node_t *find_cib_cluster_node(const char *id, const char *uname); -int -crm_remote_peer_cache_size(void) +/*! + * \internal + * \brief Get the number of Pacemaker Remote nodes that have been seen + * + * \return Number of cached Pacemaker Remote nodes + */ +unsigned int +pcmk__cluster_num_remote_nodes(void) { if (crm_remote_peer_cache == NULL) { - return 0; + return 0U; } return g_hash_table_size(crm_remote_peer_cache); } /*! - * \brief Get a remote node peer cache entry, creating it if necessary + * \internal + * \brief Get a remote node cache entry, creating it if necessary * * \param[in] node_name Name of remote node * - * \return Cache entry for node on success, NULL (and set errno) otherwise + * \return Cache entry for node on success, or \c NULL (and set \c errno) + * otherwise * - * \note When creating a new entry, this will leave the node state undetermined, - * so the caller should also call pcmk__update_peer_state() if the state + * \note When creating a new entry, this will leave the node state undetermined. + * The caller should also call \c pcmk__update_peer_state() if the state * is known. + * \note Because this can add and remove cache entries, callers should not + * assume any previously obtained cache entry pointers remain valid. */ crm_node_t * -crm_remote_peer_get(const char *node_name) +pcmk__cluster_lookup_remote_node(const char *node_name) { crm_node_t *node; + char *node_name_copy = NULL; if (node_name == NULL) { - errno = -EINVAL; + errno = EINVAL; return NULL; } + /* It's theoretically possible that the node was added to the cluster peer + * cache before it was known to be a Pacemaker Remote node. Remove that + * entry unless it has a node ID, which means the name actually is + * associated with a cluster node. (@TODO return an error in that case?) + */ + node = pcmk__search_node_caches(0, node_name, + pcmk__node_search_cluster_member); + if ((node != NULL) && (node->uuid == NULL)) { + /* node_name could be a pointer into the cache entry being removed, so + * reassign it to a copy before the original gets freed + */ + node_name_copy = strdup(node_name); + if (node_name_copy == NULL) { + errno = ENOMEM; + return NULL; + } + node_name = node_name_copy; + pcmk__cluster_forget_cluster_node(0, node_name); + } + /* Return existing cache entry if one exists */ node = g_hash_table_lookup(crm_remote_peer_cache, node_name); if (node) { + free(node_name_copy); return node; } /* Allocate a new entry */ node = calloc(1, sizeof(crm_node_t)); if (node == NULL) { + free(node_name_copy); return NULL; } @@ -130,7 +168,8 @@ crm_remote_peer_get(const char *node_name) node->uuid = strdup(node_name); if (node->uuid == NULL) { free(node); - errno = -ENOMEM; + errno = ENOMEM; + free(node_name_copy); return NULL; } @@ -140,14 +179,28 @@ crm_remote_peer_get(const char *node_name) /* Update the entry's uname, ensuring peer status callbacks are called */ update_peer_uname(node, node_name); + free(node_name_copy); return node; } +/*! + * \internal + * \brief Remove a node from the Pacemaker Remote node cache + * + * \param[in] node_name Name of node to remove from cache + * + * \note The caller must be careful not to use \p node_name after calling this + * function if it might be a pointer into the cache entry being removed. + */ void -crm_remote_peer_cache_remove(const char *node_name) +pcmk__cluster_forget_remote_node(const char *node_name) { - if (g_hash_table_remove(crm_remote_peer_cache, node_name)) { - crm_trace("removed %s from remote peer cache", node_name); + /* Do a lookup first, because node_name could be a pointer within the entry + * being removed -- we can't log it *after* removing it. + */ + if (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL) { + crm_trace("Removing %s from Pacemaker Remote node cache", node_name); + g_hash_table_remove(crm_remote_peer_cache, node_name); } } @@ -157,8 +210,8 @@ crm_remote_peer_cache_remove(const char *node_name) * * \param[in] node_state XML of node state * - * \return CRM_NODE_LOST if PCMK__XA_IN_CCM is false in node_state, - * CRM_NODE_MEMBER otherwise + * \return \c CRM_NODE_LOST if \c PCMK__XA_IN_CCM is false in + * \c PCMK__XE_NODE_STATE, \c CRM_NODE_MEMBER otherwise * \note Unlike most boolean XML attributes, this one defaults to true, for * backward compatibility with older controllers that don't set it. */ @@ -208,7 +261,7 @@ remote_cache_refresh_helper(xmlNode *result, void *user_data) if (node == NULL) { /* Node is not in cache, so add a new entry for it */ - node = crm_remote_peer_get(remote); + node = pcmk__cluster_lookup_remote_node(remote); CRM_ASSERT(node); if (state) { pcmk__update_peer_state(__func__, node, state, 0); @@ -236,16 +289,17 @@ is_dirty(gpointer key, gpointer value, gpointer user_data) } /*! - * \brief Repopulate the remote peer cache based on CIB XML + * \internal + * \brief Repopulate the remote node cache based on CIB XML * - * \param[in] xmlNode CIB XML to parse + * \param[in] cib CIB XML to parse */ -void -crm_remote_peer_cache_refresh(xmlNode *cib) +static void +refresh_remote_nodes(xmlNode *cib) { struct refresh_data data; - crm_peer_init(); + pcmk__cluster_init_node_caches(); /* First, we mark all existing cache entries as dirty, * so that later we can remove any that weren't in the CIB. @@ -254,7 +308,7 @@ crm_remote_peer_cache_refresh(xmlNode *cib) g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL); /* Look for guest nodes and remote nodes in the status section */ - data.field = "id"; + data.field = PCMK_XA_ID; data.has_state = TRUE; crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_STATUS, remote_cache_refresh_helper, &data); @@ -265,11 +319,11 @@ crm_remote_peer_cache_refresh(xmlNode *cib) * peer status callback isn't called until we're sure the node started * successfully. */ - data.field = "value"; + data.field = PCMK_XA_VALUE; data.has_state = FALSE; crm_foreach_xpath_result(cib, PCMK__XP_GUEST_NODE_CONFIG, remote_cache_refresh_helper, &data); - data.field = "id"; + data.field = PCMK_XA_ID; data.has_state = FALSE; crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_CONFIG, remote_cache_refresh_helper, &data); @@ -278,105 +332,183 @@ crm_remote_peer_cache_refresh(xmlNode *cib) g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL); } -gboolean -crm_is_peer_active(const crm_node_t * node) +/*! + * \internal + * \brief Check whether a node is an active cluster node + * + * Remote nodes are never considered active. This guarantees that they can never + * become DC. + * + * \param[in] node Node to check + * + * \return \c true if the node is an active cluster node, or \c false otherwise + */ +bool +pcmk__cluster_is_node_active(const crm_node_t *node) { - if(node == NULL) { - return FALSE; - } + const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer(); - if (pcmk_is_set(node->flags, crm_remote_node)) { - /* remote nodes are never considered active members. This - * guarantees they will never be considered for DC membership.*/ - return FALSE; + if ((node == NULL) || pcmk_is_set(node->flags, crm_remote_node)) { + return false; } + + switch (cluster_layer) { + case pcmk_cluster_layer_corosync: #if SUPPORT_COROSYNC - if (is_corosync_cluster()) { - return crm_is_corosync_peer_active(node); + return pcmk__corosync_is_peer_active(node); +#else + break; +#endif // SUPPORT_COROSYNC + default: + break; } -#endif - crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type())); - return FALSE; + + crm_err("Unhandled cluster layer: %s", + pcmk_cluster_layer_text(cluster_layer)); + return false; } +/*! + * \internal + * \brief Check if a node's entry should be removed from the cluster node cache + * + * A node should be removed from the cache if it's inactive and matches another + * \c crm_node_t (the search object). The node is considered a mismatch if any + * of the following are true: + * * The search object is \c NULL. + * * The search object has an ID set and the cached node's ID does not match it. + * * The search object does not have an ID set, and the cached node's name does + * not match the search node's name. (If both names are \c NULL, it's a + * match.) + * + * Otherwise, the node is considered a match. + * + * Note that if the search object has both an ID and a name set, the name is + * ignored for matching purposes. + * + * \param[in] key Ignored + * \param[in] value \c crm_node_t object from cluster node cache + * \param[in] user_data \c crm_node_t object to match against (search object) + * + * \return \c TRUE if the node entry should be removed from \c crm_peer_cache, + * or \c FALSE otherwise + */ static gboolean -crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data) +should_forget_cluster_node(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; crm_node_t *search = user_data; if (search == NULL) { return FALSE; - - } else if (search->id && node->id != search->id) { + } + if ((search->id != 0) && (node->id != search->id)) { return FALSE; - - } else if (search->id == 0 && !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) { + } + if ((search->id == 0) + && !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) { + // @TODO Consider name even if ID is set? + return FALSE; + } + if (pcmk__cluster_is_node_active(value)) { return FALSE; - - } else if (crm_is_peer_active(value) == FALSE) { - crm_info("Removing node with name %s and id %u from membership cache", - (node->uname? node->uname : "unknown"), node->id); - return TRUE; } - return FALSE; + + crm_info("Removing node with name %s and " PCMK_XA_ID " %u from membership " + "cache", + pcmk__s(node->uname, "(unknown)"), node->id); + return TRUE; } /*! - * \brief Remove all peer cache entries matching a node ID and/or uname + * \internal + * \brief Remove one or more inactive nodes from the cluster node cache * - * \param[in] id ID of node to remove (or 0 to ignore) - * \param[in] name Uname of node to remove (or NULL to ignore) + * All inactive nodes matching \p id and \p node_name as described in + * \c should_forget_cluster_node documentation are removed from the cache. * - * \return Number of cache entries removed + * If \p id is 0 and \p node_name is \c NULL, all inactive nodes are removed + * from the cache regardless of ID and name. This differs from clearing the + * cache, in that entries for active nodes are preserved. + * + * \param[in] id ID of node to remove from cache (0 to ignore) + * \param[in] node_name Name of node to remove from cache (ignored if \p id is + * nonzero) + * + * \note \p node_name is not modified directly, but it will be freed if it's a + * pointer into a cache entry that is removed. */ -guint -reap_crm_member(uint32_t id, const char *name) +void +pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name) { - int matches = 0; crm_node_t search = { 0, }; + char *criterion = NULL; // For logging + guint matches = 0; if (crm_peer_cache == NULL) { - crm_trace("Membership cache not initialized, ignoring purge request"); - return 0; + crm_trace("Membership cache not initialized, ignoring removal request"); + return; } search.id = id; - pcmk__str_update(&search.uname, name); - matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search); - if(matches) { - crm_notice("Purged %d peer%s with id=%u%s%s from the membership cache", - matches, pcmk__plural_s(matches), search.id, - (search.uname? " and/or uname=" : ""), - (search.uname? search.uname : "")); + search.uname = pcmk__str_copy(node_name); // May log after original freed + + if (id > 0) { + criterion = crm_strdup_printf(PCMK_XA_ID "=%" PRIu32, id); + + } else if (node_name != NULL) { + criterion = crm_strdup_printf(PCMK_XA_UNAME "=%s", node_name); + } + + matches = g_hash_table_foreach_remove(crm_peer_cache, + should_forget_cluster_node, &search); + if (matches > 0) { + if (criterion != NULL) { + crm_notice("Removed %u inactive node%s with %s from the membership " + "cache", + matches, pcmk__plural_s(matches), criterion); + } else { + crm_notice("Removed all (%u) inactive cluster nodes from the " + "membership cache", + matches); + } } else { - crm_info("No peers with id=%u%s%s to purge from the membership cache", - search.id, (search.uname? " and/or uname=" : ""), - (search.uname? search.uname : "")); + crm_info("No inactive cluster nodes%s%s to remove from the membership " + "cache", + ((criterion != NULL)? " with " : ""), pcmk__s(criterion, "")); } free(search.uname); - return matches; + free(criterion); } static void count_peer(gpointer key, gpointer value, gpointer user_data) { - guint *count = user_data; + unsigned int *count = user_data; crm_node_t *node = value; - if (crm_is_peer_active(node)) { + if (pcmk__cluster_is_node_active(node)) { *count = *count + 1; } } -guint -crm_active_peers(void) +/*! + * \internal + * \brief Get the number of active cluster nodes that have been seen + * + * Remote nodes are never considered active. This guarantees that they can never + * become DC. + * + * \return Number of active nodes in the cluster node cache + */ +unsigned int +pcmk__cluster_num_active_nodes(void) { - guint count = 0; + unsigned int count = 0; - if (crm_peer_cache) { + if (crm_peer_cache != NULL) { g_hash_table_foreach(crm_peer_cache, count_peer, &count); } return count; @@ -397,8 +529,12 @@ destroy_crm_node(gpointer data) free(node); } +/*! + * \internal + * \brief Initialize node caches + */ void -crm_peer_init(void) +pcmk__cluster_init_node_caches(void) { if (crm_peer_cache == NULL) { crm_peer_cache = pcmk__strikey_table(free, destroy_crm_node); @@ -408,69 +544,78 @@ crm_peer_init(void) crm_remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node); } - if (known_node_cache == NULL) { - known_node_cache = pcmk__strikey_table(free, destroy_crm_node); + if (cluster_node_cib_cache == NULL) { + cluster_node_cib_cache = pcmk__strikey_table(free, destroy_crm_node); } } +/*! + * \internal + * \brief Initialize node caches + */ void -crm_peer_destroy(void) +pcmk__cluster_destroy_node_caches(void) { if (crm_peer_cache != NULL) { - crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache)); + crm_trace("Destroying peer cache with %d members", + g_hash_table_size(crm_peer_cache)); g_hash_table_destroy(crm_peer_cache); crm_peer_cache = NULL; } if (crm_remote_peer_cache != NULL) { - crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache)); + crm_trace("Destroying remote peer cache with %d members", + pcmk__cluster_num_remote_nodes()); g_hash_table_destroy(crm_remote_peer_cache); crm_remote_peer_cache = NULL; } - if (known_node_cache != NULL) { - crm_trace("Destroying known node cache with %d members", - g_hash_table_size(known_node_cache)); - g_hash_table_destroy(known_node_cache); - known_node_cache = NULL; + if (cluster_node_cib_cache != NULL) { + crm_trace("Destroying configured cluster node cache with %d members", + g_hash_table_size(cluster_node_cib_cache)); + g_hash_table_destroy(cluster_node_cib_cache); + cluster_node_cib_cache = NULL; } - } static void (*peer_status_callback)(enum crm_status_type, crm_node_t *, const void *) = NULL; /*! + * \internal * \brief Set a client function that will be called after peer status changes * * \param[in] dispatch Pointer to function to use as callback * - * \note Previously, client callbacks were responsible for peer cache - * management. This is no longer the case, and client callbacks should do - * only client-specific handling. Callbacks MUST NOT add or remove entries - * in the peer caches. + * \note Client callbacks should do only client-specific handling. Callbacks + * must not add or remove entries in the peer caches. */ void -crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)) +pcmk__cluster_set_status_callback(void (*dispatch)(enum crm_status_type, + crm_node_t *, const void *)) { + // @TODO Improve documentation of peer_status_callback peer_status_callback = dispatch; } /*! + * \internal * \brief Tell the library whether to automatically reap lost nodes * - * If TRUE (the default), calling crm_update_peer_proc() will also update the - * peer state to CRM_NODE_MEMBER or CRM_NODE_LOST, and pcmk__update_peer_state() - * will reap peers whose state changes to anything other than CRM_NODE_MEMBER. + * If \c true (the default), calling \c crm_update_peer_proc() will also update + * the peer state to \c CRM_NODE_MEMBER or \c CRM_NODE_LOST, and updating the + * peer state will reap peers whose state changes to anything other than + * \c CRM_NODE_MEMBER. + * * Callers should leave this enabled unless they plan to manage the cache * separately on their own. * - * \param[in] autoreap TRUE to enable automatic reaping, FALSE to disable + * \param[in] enable \c true to enable automatic reaping, \c false to disable */ void -crm_set_autoreap(gboolean autoreap) +pcmk__cluster_set_autoreap(bool enable) { - crm_autoreap = autoreap; + autoreap = enable; } static void @@ -494,82 +639,7 @@ hash_find_by_data(gpointer key, gpointer value, gpointer user_data) /*! * \internal - * \brief Search caches for a node (cluster or Pacemaker Remote) - * - * \param[in] id If not 0, cluster node ID to search for - * \param[in] uname If not NULL, node name to search for - * \param[in] flags Bitmask of enum crm_get_peer_flags - * - * \return Node cache entry if found, otherwise NULL - */ -crm_node_t * -pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags) -{ - crm_node_t *node = NULL; - - CRM_ASSERT(id > 0 || uname != NULL); - - crm_peer_init(); - - if ((uname != NULL) && pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) { - node = g_hash_table_lookup(crm_remote_peer_cache, uname); - } - - if ((node == NULL) && pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) { - node = pcmk__search_cluster_node_cache(id, uname, NULL); - } - return node; -} - -/*! - * \brief Get a node cache entry (cluster or Pacemaker Remote) - * - * \param[in] id If not 0, cluster node ID to search for - * \param[in] uname If not NULL, node name to search for - * \param[in] uuid If not NULL while id is 0, node UUID instead of cluster - * node ID to search for - * \param[in] flags Bitmask of enum crm_get_peer_flags - * - * \return (Possibly newly created) node cache entry - */ -crm_node_t * -pcmk__get_peer_full(unsigned int id, const char *uname, const char *uuid, - int flags) -{ - crm_node_t *node = NULL; - - CRM_ASSERT(id > 0 || uname != NULL); - - crm_peer_init(); - - if (pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) { - node = g_hash_table_lookup(crm_remote_peer_cache, uname); - } - - if ((node == NULL) && pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) { - node = pcmk__get_peer(id, uname, uuid); - } - return node; -} - -/*! - * \brief Get a node cache entry (cluster or Pacemaker Remote) - * - * \param[in] id If not 0, cluster node ID to search for - * \param[in] uname If not NULL, node name to search for - * \param[in] flags Bitmask of enum crm_get_peer_flags - * - * \return (Possibly newly created) node cache entry - */ -crm_node_t * -crm_get_peer_full(unsigned int id, const char *uname, int flags) -{ - return pcmk__get_peer_full(id, uname, NULL, flags); -} - -/*! - * \internal - * \brief Search cluster node cache + * \brief Search cluster member node cache * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for @@ -578,9 +648,9 @@ crm_get_peer_full(unsigned int id, const char *uname, int flags) * * \return Cluster node cache entry if found, otherwise NULL */ -crm_node_t * -pcmk__search_cluster_node_cache(unsigned int id, const char *uname, - const char *uuid) +static crm_node_t * +search_cluster_member_cache(unsigned int id, const char *uname, + const char *uuid) { GHashTableIter iter; crm_node_t *node = NULL; @@ -589,7 +659,7 @@ pcmk__search_cluster_node_cache(unsigned int id, const char *uname, CRM_ASSERT(id > 0 || uname != NULL); - crm_peer_init(); + pcmk__cluster_init_node_caches(); if (uname != NULL) { g_hash_table_iter_init(&iter, crm_peer_cache); @@ -681,6 +751,85 @@ pcmk__search_cluster_node_cache(unsigned int id, const char *uname, return node; } +/*! + * \internal + * \brief Search caches for a node (cluster or Pacemaker Remote) + * + * \param[in] id If not 0, cluster node ID to search for + * \param[in] uname If not NULL, node name to search for + * \param[in] flags Group of enum pcmk__node_search_flags + * + * \return Node cache entry if found, otherwise NULL + */ +crm_node_t * +pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags) +{ + crm_node_t *node = NULL; + + CRM_ASSERT(id > 0 || uname != NULL); + + pcmk__cluster_init_node_caches(); + + if ((uname != NULL) && pcmk_is_set(flags, pcmk__node_search_remote)) { + node = g_hash_table_lookup(crm_remote_peer_cache, uname); + } + + if ((node == NULL) + && pcmk_is_set(flags, pcmk__node_search_cluster_member)) { + + node = search_cluster_member_cache(id, uname, NULL); + } + + if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster_cib)) { + char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id); + + node = find_cib_cluster_node(id_str, uname); + free(id_str); + } + + return node; +} + +/*! + * \internal + * \brief Purge a node from cache (both cluster and Pacemaker Remote) + * + * \param[in] node_name If not NULL, purge only nodes with this name + * \param[in] node_id If not 0, purge cluster nodes only if they have this ID + * + * \note If \p node_name is NULL and \p node_id is 0, no nodes will be purged. + * If \p node_name is not NULL and \p node_id is not 0, Pacemaker Remote + * nodes that match \p node_name will be purged, and cluster nodes that + * match both \p node_name and \p node_id will be purged. + * \note The caller must be careful not to use \p node_name after calling this + * function if it might be a pointer into a cache entry being removed. + */ +void +pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id) +{ + char *node_name_copy = NULL; + + if ((node_name == NULL) && (node_id == 0U)) { + return; + } + + // Purge from Pacemaker Remote node cache + if ((node_name != NULL) + && (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL)) { + /* node_name could be a pointer into the cache entry being purged, + * so reassign it to a copy before the original gets freed + */ + node_name_copy = pcmk__str_copy(node_name); + node_name = node_name_copy; + + crm_trace("Purging %s from Pacemaker Remote node cache", node_name); + g_hash_table_remove(crm_remote_peer_cache, node_name); + } + + pcmk__cluster_forget_cluster_node(node_id, node_name); + free(node_name_copy); +} + #if SUPPORT_COROSYNC static guint remove_conflicting_peer(crm_node_t *node) @@ -704,7 +853,7 @@ remove_conflicting_peer(crm_node_t *node) && existing_node->uname != NULL && strcasecmp(existing_node->uname, node->uname) == 0) { - if (crm_is_peer_active(existing_node)) { + if (pcmk__cluster_is_node_active(existing_node)) { continue; } @@ -721,32 +870,51 @@ remove_conflicting_peer(crm_node_t *node) #endif /*! - * \brief Get a cluster node cache entry + * \internal + * \brief Get a cluster node cache entry, possibly creating one if not found + * + * If \c pcmk__node_search_cluster_member is set in \p flags, the return value + * is guaranteed not to be \c NULL. A new cache entry is created if one does not + * already exist. * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for * \param[in] uuid If not NULL while id is 0, node UUID instead of cluster * node ID to search for + * \param[in] flags Group of enum pcmk__node_search_flags * * \return (Possibly newly created) cluster node cache entry */ /* coverity[-alloc] Memory is referenced in one or both hashtables */ crm_node_t * -pcmk__get_peer(unsigned int id, const char *uname, const char *uuid) +pcmk__get_node(unsigned int id, const char *uname, const char *uuid, + uint32_t flags) { crm_node_t *node = NULL; char *uname_lookup = NULL; CRM_ASSERT(id > 0 || uname != NULL); - crm_peer_init(); + pcmk__cluster_init_node_caches(); + + // Check the Pacemaker Remote node cache first + if (pcmk_is_set(flags, pcmk__node_search_remote)) { + node = g_hash_table_lookup(crm_remote_peer_cache, uname); + if (node != NULL) { + return node; + } + } + + if (!pcmk_is_set(flags, pcmk__node_search_cluster_member)) { + return NULL; + } - node = pcmk__search_cluster_node_cache(id, uname, uuid); + node = search_cluster_member_cache(id, uname, uuid); /* if uname wasn't provided, and find_peer did not turn up a uname based on id. * we need to do a lookup of the node name using the id in the cluster membership. */ if ((node == NULL || node->uname == NULL) && (uname == NULL)) { - uname_lookup = get_node_name(id); + uname_lookup = pcmk__cluster_node_name(id); } if (uname_lookup) { @@ -755,16 +923,14 @@ pcmk__get_peer(unsigned int id, const char *uname, const char *uuid) /* try to turn up the node one more time now that we know the uname. */ if (node == NULL) { - node = pcmk__search_cluster_node_cache(id, uname, uuid); + node = search_cluster_member_cache(id, uname, uuid); } } - if (node == NULL) { char *uniqueid = crm_generate_uuid(); - node = calloc(1, sizeof(crm_node_t)); - CRM_ASSERT(node); + node = pcmk__assert_alloc(1, sizeof(crm_node_t)); crm_info("Created entry %s/%p for node %s/%u (%d total)", uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache)); @@ -785,7 +951,7 @@ pcmk__get_peer(unsigned int id, const char *uname, const char *uuid) if(node->uuid == NULL) { if (uuid == NULL) { - uuid = crm_peer_uuid(node); + uuid = pcmk__cluster_node_uuid(node); } if (uuid) { @@ -802,21 +968,6 @@ pcmk__get_peer(unsigned int id, const char *uname, const char *uuid) } /*! - * \brief Get a cluster node cache entry - * - * \param[in] id If not 0, cluster node ID to search for - * \param[in] uname If not NULL, node name to search for - * - * \return (Possibly newly created) cluster node cache entry - */ -/* coverity[-alloc] Memory is referenced in one or both hashtables */ -crm_node_t * -crm_get_peer(unsigned int id, const char *uname) -{ - return pcmk__get_peer(id, uname, NULL); -} - -/*! * \internal * \brief Update a node's uname * @@ -856,7 +1007,9 @@ update_peer_uname(crm_node_t *node, const char *uname) } #if SUPPORT_COROSYNC - if (is_corosync_cluster() && !pcmk_is_set(node->flags, crm_remote_node)) { + if ((pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) + && !pcmk_is_set(node->flags, crm_remote_node)) { + remove_conflicting_peer(node); } #endif @@ -879,24 +1032,6 @@ proc2text(enum crm_proc_flag proc) case crm_proc_none: text = "none"; break; - case crm_proc_based: - text = "pacemaker-based"; - break; - case crm_proc_controld: - text = "pacemaker-controld"; - break; - case crm_proc_schedulerd: - text = "pacemaker-schedulerd"; - break; - case crm_proc_execd: - text = "pacemaker-execd"; - break; - case crm_proc_attrd: - text = "pacemaker-attrd"; - break; - case crm_proc_fenced: - text = "pacemaker-fenced"; - break; case crm_proc_cpg: text = "corosync-cpg"; break; @@ -942,7 +1077,7 @@ crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const changed = TRUE; } - } else if (pcmk__str_eq(status, ONLINESTATUS, pcmk__str_casei)) { + } else if (pcmk__str_eq(status, PCMK_VALUE_ONLINE, pcmk__str_casei)) { if ((node->processes & flag) != flag) { node->processes = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Peer process", @@ -989,7 +1124,7 @@ crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const return NULL; } - if (crm_autoreap) { + if (autoreap) { const char *peer_state = NULL; if (pcmk_is_set(node->processes, crm_get_cluster_proc())) { @@ -1099,18 +1234,20 @@ update_peer_state_iter(const char *source, crm_node_t *node, const char *state, } free(last); - if (crm_autoreap && !is_member + if (autoreap && !is_member && !pcmk_is_set(node->flags, crm_remote_node)) { /* We only autoreap from the peer cache, not the remote peer cache, * because the latter should be managed only by - * crm_remote_peer_cache_refresh(). + * refresh_remote_nodes(). */ if(iter) { - crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname); + crm_notice("Purged 1 peer with " PCMK_XA_ID + "=%u and/or uname=%s from the membership cache", + node->id, node->uname); g_hash_table_iter_remove(iter); } else { - reap_crm_member(node->id, node->uname); + pcmk__cluster_forget_cluster_node(node->id, node->uname); } node = NULL; } @@ -1178,7 +1315,7 @@ pcmk__reap_unseen_nodes(uint64_t membership) } static crm_node_t * -find_known_node(const char *id, const char *uname) +find_cib_cluster_node(const char *id, const char *uname) { GHashTableIter iter; crm_node_t *node = NULL; @@ -1186,7 +1323,7 @@ find_known_node(const char *id, const char *uname) crm_node_t *by_name = NULL; if (uname) { - g_hash_table_iter_init(&iter, known_node_cache); + g_hash_table_iter_init(&iter, cluster_node_cib_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (node->uname && strcasecmp(node->uname, uname) == 0) { crm_trace("Name match: %s = %p", node->uname, node); @@ -1197,7 +1334,7 @@ find_known_node(const char *id, const char *uname) } if (id) { - g_hash_table_iter_init(&iter, known_node_cache); + g_hash_table_iter_init(&iter, cluster_node_cib_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(strcasecmp(node->uuid, id) == 0) { crm_trace("ID match: %s= %p", id, node); @@ -1256,28 +1393,24 @@ find_known_node(const char *id, const char *uname) } static void -known_node_cache_refresh_helper(xmlNode *xml_node, void *user_data) +cluster_node_cib_cache_refresh_helper(xmlNode *xml_node, void *user_data) { - const char *id = crm_element_value(xml_node, XML_ATTR_ID); - const char *uname = crm_element_value(xml_node, XML_ATTR_UNAME); + const char *id = crm_element_value(xml_node, PCMK_XA_ID); + const char *uname = crm_element_value(xml_node, PCMK_XA_UNAME); crm_node_t * node = NULL; CRM_CHECK(id != NULL && uname !=NULL, return); - node = find_known_node(id, uname); + node = find_cib_cluster_node(id, uname); if (node == NULL) { char *uniqueid = crm_generate_uuid(); - node = calloc(1, sizeof(crm_node_t)); - CRM_ASSERT(node != NULL); + node = pcmk__assert_alloc(1, sizeof(crm_node_t)); - node->uname = strdup(uname); - CRM_ASSERT(node->uname != NULL); + node->uname = pcmk__str_copy(uname); + node->uuid = pcmk__str_copy(id); - node->uuid = strdup(id); - CRM_ASSERT(node->uuid != NULL); - - g_hash_table_replace(known_node_cache, uniqueid, node); + g_hash_table_replace(cluster_node_cib_cache, uniqueid, node); } else if (pcmk_is_set(node->flags, crm_node_dirty)) { pcmk__str_update(&node->uname, uname); @@ -1289,77 +1422,148 @@ known_node_cache_refresh_helper(xmlNode *xml_node, void *user_data) } static void -refresh_known_node_cache(xmlNode *cib) +refresh_cluster_node_cib_cache(xmlNode *cib) { - crm_peer_init(); + pcmk__cluster_init_node_caches(); - g_hash_table_foreach(known_node_cache, mark_dirty, NULL); + g_hash_table_foreach(cluster_node_cib_cache, mark_dirty, NULL); crm_foreach_xpath_result(cib, PCMK__XP_MEMBER_NODE_CONFIG, - known_node_cache_refresh_helper, NULL); + cluster_node_cib_cache_refresh_helper, NULL); - /* Remove all old cache entries that weren't seen in the CIB */ - g_hash_table_foreach_remove(known_node_cache, is_dirty, NULL); + // Remove all old cache entries that weren't seen in the CIB + g_hash_table_foreach_remove(cluster_node_cib_cache, is_dirty, NULL); } void pcmk__refresh_node_caches_from_cib(xmlNode *cib) { - crm_remote_peer_cache_refresh(cib); - refresh_known_node_cache(cib); + refresh_remote_nodes(cib); + refresh_cluster_node_cib_cache(cib); +} + +// Deprecated functions kept only for backward API compatibility +// LCOV_EXCL_START + +#include <crm/cluster/compat.h> + +int +crm_terminate_member(int nodeid, const char *uname, void *unused) +{ + return stonith_api_kick(nodeid, uname, 120, TRUE); +} + +int +crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) +{ + return stonith_api_kick(nodeid, uname, 120, TRUE); } -/*! - * \internal - * \brief Search known node cache - * - * \param[in] id If not 0, cluster node ID to search for - * \param[in] uname If not NULL, node name to search for - * \param[in] flags Bitmask of enum crm_get_peer_flags - * - * \return Known node cache entry if found, otherwise NULL - */ crm_node_t * -pcmk__search_known_node_cache(unsigned int id, const char *uname, - uint32_t flags) +crm_get_peer(unsigned int id, const char *uname) { - crm_node_t *node = NULL; - char *id_str = NULL; + return pcmk__get_node(id, uname, NULL, pcmk__node_search_cluster_member); +} - CRM_ASSERT(id > 0 || uname != NULL); +crm_node_t * +crm_get_peer_full(unsigned int id, const char *uname, int flags) +{ + return pcmk__get_node(id, uname, NULL, flags); +} - node = pcmk__search_node_caches(id, uname, flags); +int +crm_remote_peer_cache_size(void) +{ + unsigned int count = pcmk__cluster_num_remote_nodes(); - if (node || !(flags & CRM_GET_PEER_CLUSTER)) { - return node; - } + return QB_MIN(count, INT_MAX); +} - if (id > 0) { - id_str = crm_strdup_printf("%u", id); - } +void +crm_remote_peer_cache_refresh(xmlNode *cib) +{ + refresh_remote_nodes(cib); +} - node = find_known_node(id_str, uname); +crm_node_t * +crm_remote_peer_get(const char *node_name) +{ + return pcmk__cluster_lookup_remote_node(node_name); +} - free(id_str); - return node; +void +crm_remote_peer_cache_remove(const char *node_name) +{ + pcmk__cluster_forget_remote_node(node_name); } +gboolean +crm_is_peer_active(const crm_node_t * node) +{ + return pcmk__cluster_is_node_active(node); +} -// Deprecated functions kept only for backward API compatibility -// LCOV_EXCL_START +guint +crm_active_peers(void) +{ + return pcmk__cluster_num_active_nodes(); +} -#include <crm/cluster/compat.h> +guint +reap_crm_member(uint32_t id, const char *name) +{ + int matches = 0; + crm_node_t search = { 0, }; -int -crm_terminate_member(int nodeid, const char *uname, void *unused) + if (crm_peer_cache == NULL) { + crm_trace("Membership cache not initialized, ignoring purge request"); + return 0; + } + + search.id = id; + search.uname = pcmk__str_copy(name); + matches = g_hash_table_foreach_remove(crm_peer_cache, + should_forget_cluster_node, &search); + if(matches) { + crm_notice("Purged %d peer%s with " PCMK_XA_ID + "=%u%s%s from the membership cache", + matches, pcmk__plural_s(matches), search.id, + (search.uname? " and/or uname=" : ""), + (search.uname? search.uname : "")); + + } else { + crm_info("No peers with " PCMK_XA_ID + "=%u%s%s to purge from the membership cache", + search.id, (search.uname? " and/or uname=" : ""), + (search.uname? search.uname : "")); + } + + free(search.uname); + return matches; +} + +void +crm_peer_init(void) { - return stonith_api_kick(nodeid, uname, 120, TRUE); + pcmk__cluster_init_node_caches(); } -int -crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) +void +crm_peer_destroy(void) { - return stonith_api_kick(nodeid, uname, 120, TRUE); + pcmk__cluster_destroy_node_caches(); +} + +void +crm_set_autoreap(gboolean enable) +{ + pcmk__cluster_set_autoreap(enable); +} + +void +crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)) +{ + pcmk__cluster_set_status_callback(dispatch); } // LCOV_EXCL_STOP diff --git a/lib/cluster/tests/Makefile.am b/lib/cluster/tests/Makefile.am new file mode 100644 index 0000000..f4f5658 --- /dev/null +++ b/lib/cluster/tests/Makefile.am @@ -0,0 +1,12 @@ +# +# Copyright 2024 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. +# + +SUBDIRS = \ + cluster \ + cpg diff --git a/lib/cluster/tests/cluster/Makefile.am b/lib/cluster/tests/cluster/Makefile.am new file mode 100644 index 0000000..072a4ee --- /dev/null +++ b/lib/cluster/tests/cluster/Makefile.am @@ -0,0 +1,18 @@ +# +# Copyright 2024 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. +# + +include $(top_srcdir)/mk/tap.mk +include $(top_srcdir)/mk/unittest.mk + +LDADD += $(top_builddir)/lib/cluster/libcrmcluster.la + +# Add "_test" to the end of all test program names to simplify .gitignore. +check_PROGRAMS = pcmk_cluster_set_destroy_fn_test + +TESTS = $(check_PROGRAMS) diff --git a/lib/cluster/tests/cluster/pcmk_cluster_set_destroy_fn_test.c b/lib/cluster/tests/cluster/pcmk_cluster_set_destroy_fn_test.c new file mode 100644 index 0000000..f6a7ac2 --- /dev/null +++ b/lib/cluster/tests/cluster/pcmk_cluster_set_destroy_fn_test.c @@ -0,0 +1,79 @@ +/* + * Copyright 2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <glib.h> // gpointer + +#include <crm/cluster.h> // pcmk_cluster_t, etc. +#include <crm/common/unittest_internal.h> + +static void +destroy_fn1(gpointer arg) +{ + return; +} + +static void +destroy_fn2(gpointer arg) +{ + return; +} + +static void +null_cluster(void **state) +{ + assert_int_equal(pcmk_cluster_set_destroy_fn(NULL, NULL), EINVAL); + assert_int_equal(pcmk_cluster_set_destroy_fn(NULL, destroy_fn1), EINVAL); +} + +static void +null_fn(void **state) +{ + pcmk_cluster_t cluster = { + .destroy = NULL, + }; + + assert_int_equal(pcmk_cluster_set_destroy_fn(&cluster, NULL), pcmk_rc_ok); + assert_ptr_equal(cluster.destroy, NULL); + + cluster.destroy = destroy_fn1; + assert_int_equal(pcmk_cluster_set_destroy_fn(&cluster, NULL), pcmk_rc_ok); + assert_ptr_equal(cluster.destroy, NULL); +} + +static void +previous_fn_null(void **state) +{ + pcmk_cluster_t cluster = { + .destroy = NULL, + }; + + assert_int_equal(pcmk_cluster_set_destroy_fn(&cluster, destroy_fn1), + pcmk_rc_ok); + assert_ptr_equal(cluster.destroy, destroy_fn1); +} + +static void +previous_fn_nonnull(void **state) +{ + pcmk_cluster_t cluster = { + .destroy = destroy_fn2, + }; + + assert_int_equal(pcmk_cluster_set_destroy_fn(&cluster, destroy_fn1), + pcmk_rc_ok); + assert_ptr_equal(cluster.destroy, destroy_fn1); +} + +PCMK__UNIT_TEST(NULL, NULL, + cmocka_unit_test(null_cluster), + cmocka_unit_test(null_fn), + cmocka_unit_test(previous_fn_null), + cmocka_unit_test(previous_fn_nonnull)) diff --git a/lib/cluster/tests/cpg/Makefile.am b/lib/cluster/tests/cpg/Makefile.am new file mode 100644 index 0000000..625f943 --- /dev/null +++ b/lib/cluster/tests/cpg/Makefile.am @@ -0,0 +1,19 @@ +# +# Copyright 2024 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. +# + +include $(top_srcdir)/mk/tap.mk +include $(top_srcdir)/mk/unittest.mk + +LDADD += $(top_builddir)/lib/cluster/libcrmcluster.la + +# Add "_test" to the end of all test program names to simplify .gitignore. +check_PROGRAMS = pcmk_cpg_set_confchg_fn_test \ + pcmk_cpg_set_deliver_fn_test + +TESTS = $(check_PROGRAMS) diff --git a/lib/cluster/tests/cpg/pcmk_cpg_set_confchg_fn_test.c b/lib/cluster/tests/cpg/pcmk_cpg_set_confchg_fn_test.c new file mode 100644 index 0000000..b9e1b6b --- /dev/null +++ b/lib/cluster/tests/cpg/pcmk_cpg_set_confchg_fn_test.c @@ -0,0 +1,98 @@ +/* + * Copyright 2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <stdint.h> // uint32_t +#include <sys/types.h> // size_t + +#include <crm/cluster.h> // pcmk_cluster_t, etc. +#include <crm/common/unittest_internal.h> + +#if SUPPORT_COROSYNC +#include <corosync/cpg.h> // cpg_handle_t, struct cpg_name + +static void +confchg_fn1(cpg_handle_t handle, const struct cpg_name *group_name, + const struct cpg_address *member_list, size_t member_list_entries, + const struct cpg_address *left_list, size_t left_list_entries, + const struct cpg_address *joined_list, size_t joined_list_entries) +{ + return; +} + +static void +confchg_fn2(cpg_handle_t handle, const struct cpg_name *group_name, + const struct cpg_address *member_list, size_t member_list_entries, + const struct cpg_address *left_list, size_t left_list_entries, + const struct cpg_address *joined_list, size_t joined_list_entries) +{ + return; +} + +static void +null_cluster(void **state) +{ + assert_int_equal(pcmk_cpg_set_confchg_fn(NULL, NULL), EINVAL); + assert_int_equal(pcmk_cpg_set_confchg_fn(NULL, confchg_fn1), EINVAL); +} + +static void +null_fn(void **state) +{ + pcmk_cluster_t cluster = { + .cpg = { + .cpg_confchg_fn = NULL, + }, + }; + + assert_int_equal(pcmk_cpg_set_confchg_fn(&cluster, NULL), pcmk_rc_ok); + assert_ptr_equal(cluster.cpg.cpg_confchg_fn, NULL); + + cluster.cpg.cpg_confchg_fn = confchg_fn1; + assert_int_equal(pcmk_cpg_set_confchg_fn(&cluster, NULL), pcmk_rc_ok); + assert_ptr_equal(cluster.cpg.cpg_confchg_fn, NULL); +} + +static void +previous_fn_null(void **state) +{ + pcmk_cluster_t cluster = { + .cpg = { + .cpg_confchg_fn = NULL, + }, + }; + + assert_int_equal(pcmk_cpg_set_confchg_fn(&cluster, confchg_fn1), + pcmk_rc_ok); + assert_ptr_equal(cluster.cpg.cpg_confchg_fn, confchg_fn1); +} + +static void +previous_fn_nonnull(void **state) +{ + pcmk_cluster_t cluster = { + .cpg = { + .cpg_confchg_fn = confchg_fn2, + }, + }; + + assert_int_equal(pcmk_cpg_set_confchg_fn(&cluster, confchg_fn1), + pcmk_rc_ok); + assert_ptr_equal(cluster.cpg.cpg_confchg_fn, confchg_fn1); +} + +PCMK__UNIT_TEST(NULL, NULL, + cmocka_unit_test(null_cluster), + cmocka_unit_test(null_fn), + cmocka_unit_test(previous_fn_null), + cmocka_unit_test(previous_fn_nonnull)) +#else +PCMK__UNIT_TEST(NULL, NULL) +#endif // SUPPORT_COROSYNC diff --git a/lib/cluster/tests/cpg/pcmk_cpg_set_deliver_fn_test.c b/lib/cluster/tests/cpg/pcmk_cpg_set_deliver_fn_test.c new file mode 100644 index 0000000..f682def --- /dev/null +++ b/lib/cluster/tests/cpg/pcmk_cpg_set_deliver_fn_test.c @@ -0,0 +1,94 @@ +/* + * Copyright 2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <stdint.h> // uint32_t +#include <sys/types.h> // size_t + +#include <crm/cluster.h> // pcmk_cluster_t, etc. +#include <crm/common/unittest_internal.h> + +#if SUPPORT_COROSYNC +#include <corosync/cpg.h> // cpg_handle_t, struct cpg_name + +static void +deliver_fn1(cpg_handle_t handle, const struct cpg_name *group_name, + uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) +{ + return; +} + +static void +deliver_fn2(cpg_handle_t handle, const struct cpg_name *group_name, + uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) +{ + return; +} + +static void +null_cluster(void **state) +{ + assert_int_equal(pcmk_cpg_set_deliver_fn(NULL, NULL), EINVAL); + assert_int_equal(pcmk_cpg_set_deliver_fn(NULL, deliver_fn1), EINVAL); +} + +static void +null_fn(void **state) +{ + pcmk_cluster_t cluster = { + .cpg = { + .cpg_deliver_fn = NULL, + }, + }; + + assert_int_equal(pcmk_cpg_set_deliver_fn(&cluster, NULL), pcmk_rc_ok); + assert_ptr_equal(cluster.cpg.cpg_deliver_fn, NULL); + + cluster.cpg.cpg_deliver_fn = deliver_fn1; + assert_int_equal(pcmk_cpg_set_deliver_fn(&cluster, NULL), pcmk_rc_ok); + assert_ptr_equal(cluster.cpg.cpg_deliver_fn, NULL); +} + +static void +previous_fn_null(void **state) +{ + pcmk_cluster_t cluster = { + .cpg = { + .cpg_deliver_fn = NULL, + }, + }; + + assert_int_equal(pcmk_cpg_set_deliver_fn(&cluster, deliver_fn1), + pcmk_rc_ok); + assert_ptr_equal(cluster.cpg.cpg_deliver_fn, deliver_fn1); +} + +static void +previous_fn_nonnull(void **state) +{ + pcmk_cluster_t cluster = { + .cpg = { + .cpg_deliver_fn = deliver_fn2, + }, + }; + + assert_int_equal(pcmk_cpg_set_deliver_fn(&cluster, deliver_fn1), + pcmk_rc_ok); + assert_ptr_equal(cluster.cpg.cpg_deliver_fn, deliver_fn1); +} + +PCMK__UNIT_TEST(NULL, NULL, + cmocka_unit_test(null_cluster), + cmocka_unit_test(null_fn), + cmocka_unit_test(previous_fn_null), + cmocka_unit_test(previous_fn_nonnull)) +#else +PCMK__UNIT_TEST(NULL, NULL) +#endif // SUPPORT_COROSYNC |