summaryrefslogtreecommitdiffstats
path: root/lib/cluster/cluster.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-03 13:39:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-03 13:39:28 +0000
commit924f5ea83e48277e014ebf0d19a27187cb93e2f7 (patch)
tree75920a275bba045f6d108204562c218a9a26ea15 /lib/cluster/cluster.c
parentAdding upstream version 2.1.7. (diff)
downloadpacemaker-924f5ea83e48277e014ebf0d19a27187cb93e2f7.tar.xz
pacemaker-924f5ea83e48277e014ebf0d19a27187cb93e2f7.zip
Adding upstream version 2.1.8~rc1.upstream/2.1.8_rc1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lib/cluster/cluster.c')
-rw-r--r--lib/cluster/cluster.c528
1 files changed, 333 insertions, 195 deletions
diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c
index f2cd428..d650ca5 100644
--- a/lib/cluster/cluster.c
+++ b/lib/cluster/cluster.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2023 the Pacemaker project contributors
+ * Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -10,6 +10,8 @@
#include <crm_internal.h>
#include <dlfcn.h>
+#include <inttypes.h> // PRIu32
+#include <stdbool.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
@@ -17,128 +19,182 @@
#include <time.h>
#include <sys/param.h>
#include <sys/types.h>
+#include <sys/utsname.h> // uname()
+
+#include <glib.h> // gboolean
#include <crm/crm.h>
-#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
+#include <crm/common/xml.h>
#include <crm/cluster/internal.h>
#include "crmcluster_private.h"
CRM_TRACE_INIT_DATA(cluster);
/*!
- * \brief Get (and set if needed) a node's UUID
+ * \internal
+ * \brief Get the message type equivalent of a string
*
- * \param[in,out] peer Node to check
+ * \param[in] text String of message type
*
- * \return Node UUID of \p peer, or NULL if unknown
+ * \return Message type equivalent of \p text
+ */
+enum crm_ais_msg_types
+pcmk__cluster_parse_msg_type(const char *text)
+{
+ CRM_CHECK(text != NULL, return crm_msg_none);
+
+ text = pcmk__message_name(text);
+
+ if (pcmk__str_eq(text, "ais", pcmk__str_none)) {
+ return crm_msg_ais;
+ }
+ if (pcmk__str_eq(text, CRM_SYSTEM_CIB, pcmk__str_none)) {
+ return crm_msg_cib;
+ }
+ if (pcmk__str_any_of(text, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL)) {
+ return crm_msg_crmd;
+ }
+ if (pcmk__str_eq(text, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
+ return crm_msg_te;
+ }
+ if (pcmk__str_eq(text, CRM_SYSTEM_PENGINE, pcmk__str_none)) {
+ return crm_msg_pe;
+ }
+ if (pcmk__str_eq(text, CRM_SYSTEM_LRMD, pcmk__str_none)) {
+ return crm_msg_lrmd;
+ }
+ if (pcmk__str_eq(text, CRM_SYSTEM_STONITHD, pcmk__str_none)) {
+ return crm_msg_stonithd;
+ }
+ if (pcmk__str_eq(text, "stonith-ng", pcmk__str_none)) {
+ return crm_msg_stonith_ng;
+ }
+ if (pcmk__str_eq(text, "attrd", pcmk__str_none)) {
+ return crm_msg_attrd;
+ }
+ return crm_msg_none;
+}
+
+/*!
+ * \internal
+ * \brief Get a node's cluster-layer UUID, setting it if not already set
+ *
+ * \param[in,out] node Node to check
+ *
+ * \return Cluster-layer node UUID of \p node, or \c NULL if unknown
*/
const char *
-crm_peer_uuid(crm_node_t *peer)
+pcmk__cluster_node_uuid(crm_node_t *node)
{
- char *uuid = NULL;
+ const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
- // Check simple cases first, to avoid any calls that might block
- if (peer == NULL) {
+ if (node == NULL) {
return NULL;
}
- if (peer->uuid != NULL) {
- return peer->uuid;
+ if (node->uuid != NULL) {
+ return node->uuid;
}
- switch (get_cluster_type()) {
- case pcmk_cluster_corosync:
+ switch (cluster_layer) {
#if SUPPORT_COROSYNC
- uuid = pcmk__corosync_uuid(peer);
-#endif
- break;
+ case pcmk_cluster_layer_corosync:
+ node->uuid = pcmk__corosync_uuid(node);
+ return node->uuid;
+#endif // SUPPORT_COROSYNC
- case pcmk_cluster_unknown:
- case pcmk_cluster_invalid:
- crm_err("Unsupported cluster type");
- break;
+ default:
+ crm_err("Unsupported cluster layer %s",
+ pcmk_cluster_layer_text(cluster_layer));
+ return NULL;
}
-
- peer->uuid = uuid;
- return peer->uuid;
}
/*!
+ * \internal
* \brief Connect to the cluster layer
*
- * \param[in,out] Initialized cluster object to connect
+ * \param[in,out] cluster Initialized cluster object to connect
*
- * \return TRUE on success, otherwise FALSE
+ * \return Standard Pacemaker return code
*/
-gboolean
-crm_cluster_connect(crm_cluster_t *cluster)
+int
+pcmk_cluster_connect(pcmk_cluster_t *cluster)
{
- enum cluster_type_e type = get_cluster_type();
+ const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
+ const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);
- crm_notice("Connecting to %s cluster infrastructure",
- name_for_cluster_type(type));
- switch (type) {
- case pcmk_cluster_corosync:
+ // cts-lab looks for this message
+ crm_notice("Connecting to %s cluster layer", cluster_layer_s);
+
+ switch (cluster_layer) {
#if SUPPORT_COROSYNC
- crm_peer_init();
+ case pcmk_cluster_layer_corosync:
return pcmk__corosync_connect(cluster);
-#else
- break;
#endif // SUPPORT_COROSYNC
+
default:
break;
}
- return FALSE;
+
+ crm_err("Failed to connect to unsupported cluster layer %s",
+ cluster_layer_s);
+ return EPROTONOSUPPORT;
}
/*!
* \brief Disconnect from the cluster layer
*
* \param[in,out] cluster Cluster object to disconnect
+ *
+ * \return Standard Pacemaker return code
*/
-void
-crm_cluster_disconnect(crm_cluster_t *cluster)
+int
+pcmk_cluster_disconnect(pcmk_cluster_t *cluster)
{
- enum cluster_type_e type = get_cluster_type();
+ const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
+ const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);
- crm_info("Disconnecting from %s cluster infrastructure",
- name_for_cluster_type(type));
- switch (type) {
- case pcmk_cluster_corosync:
+ crm_info("Disconnecting from %s cluster layer", cluster_layer_s);
+
+ switch (cluster_layer) {
#if SUPPORT_COROSYNC
- crm_peer_destroy();
+ case pcmk_cluster_layer_corosync:
pcmk__corosync_disconnect(cluster);
+ pcmk__cluster_destroy_node_caches();
+ return pcmk_rc_ok;
#endif // SUPPORT_COROSYNC
- break;
+
default:
break;
}
+
+ crm_err("Failed to disconnect from unsupported cluster layer %s",
+ cluster_layer_s);
+ return EPROTONOSUPPORT;
}
/*!
- * \brief Allocate a new \p crm_cluster_t object
+ * \brief Allocate a new \p pcmk_cluster_t object
*
- * \return A newly allocated \p crm_cluster_t object (guaranteed not \p NULL)
+ * \return A newly allocated \p pcmk_cluster_t object (guaranteed not \c NULL)
* \note The caller is responsible for freeing the return value using
* \p pcmk_cluster_free().
*/
-crm_cluster_t *
+pcmk_cluster_t *
pcmk_cluster_new(void)
{
- crm_cluster_t *cluster = calloc(1, sizeof(crm_cluster_t));
-
- CRM_ASSERT(cluster != NULL);
- return cluster;
+ return (pcmk_cluster_t *) pcmk__assert_alloc(1, sizeof(pcmk_cluster_t));
}
/*!
- * \brief Free a \p crm_cluster_t object and its dynamically allocated members
+ * \brief Free a \p pcmk_cluster_t object and its dynamically allocated members
*
* \param[in,out] cluster Cluster object to free
*/
void
-pcmk_cluster_free(crm_cluster_t *cluster)
+pcmk_cluster_free(pcmk_cluster_t *cluster)
{
if (cluster == NULL) {
return;
@@ -149,257 +205,339 @@ pcmk_cluster_free(crm_cluster_t *cluster)
}
/*!
+ * \brief Set the destroy function for a cluster object
+ *
+ * \param[in,out] cluster Cluster object
+ * \param[in] fn Destroy function to set
+ *
+ * \return Standard Pacemaker return code
+ */
+int
+pcmk_cluster_set_destroy_fn(pcmk_cluster_t *cluster, void (*fn)(gpointer))
+{
+ if (cluster == NULL) {
+ return EINVAL;
+ }
+ cluster->destroy = fn;
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \internal
* \brief Send an XML message via the cluster messaging layer
*
* \param[in] node Cluster node to send message to
* \param[in] service Message type to use in message host info
* \param[in] data XML message to send
- * \param[in] ordered Ignored for currently supported messaging layers
*
- * \return TRUE on success, otherwise FALSE
+ * \return \c true on success, or \c false otherwise
*/
-gboolean
-send_cluster_message(const crm_node_t *node, enum crm_ais_msg_types service,
- const xmlNode *data, gboolean ordered)
+bool
+pcmk__cluster_send_message(const crm_node_t *node,
+ enum crm_ais_msg_types service, const xmlNode *data)
{
- switch (get_cluster_type()) {
- case pcmk_cluster_corosync:
+ // @TODO Return standard Pacemaker return code
+ switch (pcmk_get_cluster_layer()) {
#if SUPPORT_COROSYNC
+ case pcmk_cluster_layer_corosync:
return pcmk__cpg_send_xml(data, node, service);
-#endif
- break;
+#endif // SUPPORT_COROSYNC
+
default:
break;
}
- return FALSE;
+ return false;
}
/*!
- * \brief Get the local node's name
+ * \internal
+ * \brief Get the node name corresponding to a cluster-layer node ID
*
- * \return Local node's name
- * \note This will fatally exit if local node name cannot be known.
- */
-const char *
-get_local_node_name(void)
-{
- static char *name = NULL;
-
- if (name == NULL) {
- name = get_node_name(0);
- }
- return name;
-}
-
-/*!
- * \brief Get the node name corresponding to a cluster node ID
+ * Get the node name from the cluster layer if possible. Otherwise, if for the
+ * local node, call \c uname() and get the \c nodename member from the
+ * <tt>struct utsname</tt> object.
*
- * \param[in] nodeid Node ID to check (or 0 for local node)
+ * \param[in] nodeid Node ID to check (or 0 for the local node)
*
* \return Node name corresponding to \p nodeid
- * \note This will fatally exit if \p nodeid is 0 and local node name cannot be
- * known.
+ *
+ * \note This will fatally exit if \c uname() fails to get the local node name
+ * or we run out of memory.
+ * \note The caller is responsible for freeing the return value using \c free().
*/
char *
-get_node_name(uint32_t nodeid)
+pcmk__cluster_node_name(uint32_t nodeid)
{
- char *name = NULL;
- enum cluster_type_e stack = get_cluster_type();
+ const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
+ const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);
- switch (stack) {
- case pcmk_cluster_corosync:
+ switch (cluster_layer) {
#if SUPPORT_COROSYNC
- name = pcmk__corosync_name(0, nodeid);
+ case pcmk_cluster_layer_corosync:
+ return pcmk__corosync_name(0, nodeid);
+#else
break;
#endif // SUPPORT_COROSYNC
default:
- crm_err("Unknown cluster type: %s (%d)", name_for_cluster_type(stack), stack);
+ crm_err("Unsupported cluster layer: %s", cluster_layer_s);
+ break;
}
- if ((name == NULL) && (nodeid == 0)) {
- name = pcmk_hostname();
- if (name == NULL) {
+ if (nodeid == 0) {
+ struct utsname hostinfo;
+
+ crm_notice("Could not get local node name from %s cluster layer, "
+ "defaulting to local hostname",
+ cluster_layer_s);
+
+ if (uname(&hostinfo) < 0) {
// @TODO Maybe let the caller decide what to do
- crm_err("Could not obtain the local %s node name",
- name_for_cluster_type(stack));
+ crm_err("Failed to get the local hostname");
crm_exit(CRM_EX_FATAL);
}
- crm_notice("Defaulting to uname -n for the local %s node name",
- name_for_cluster_type(stack));
+ return pcmk__str_copy(hostinfo.nodename);
}
+ crm_notice("Could not obtain a node name for node with "
+ PCMK_XA_ID "=" PRIu32,
+ nodeid);
+ return NULL;
+}
+
+/*!
+ * \internal
+ * \brief Get the local node's cluster-layer node name
+ *
+ * If getting the node name from the cluster layer is impossible, call
+ * \c uname() and get the \c nodename member from the <tt>struct utsname</tt>
+ * object.
+ *
+ * \return Local node's name
+ *
+ * \note This will fatally exit if \c uname() fails to get the local node name
+ * or we run out of memory.
+ */
+const char *
+pcmk__cluster_local_node_name(void)
+{
+ // @TODO Refactor to avoid trivially leaking name at exit
+ static char *name = NULL;
+
if (name == NULL) {
- crm_notice("Could not obtain a node name for %s node with id %u",
- name_for_cluster_type(stack), nodeid);
+ name = pcmk__cluster_node_name(0);
}
return name;
}
/*!
- * \brief Get the node name corresponding to a node UUID
+ * \internal
+ * \brief Get the node name corresonding to a node UUID
*
- * \param[in] uuid UUID of desired node
+ * Look for the UUID in both the remote node cache and the cluster member cache.
*
- * \return name of desired node
+ * \param[in] uuid UUID to search for
*
- * \note This relies on the remote peer cache being populated with all
- * remote nodes in the cluster, so callers should maintain that cache.
+ * \return Node name corresponding to \p uuid if found, or \c NULL otherwise
*/
const char *
-crm_peer_uname(const char *uuid)
+pcmk__node_name_from_uuid(const char *uuid)
{
+ /* @TODO There are too many functions in libcrmcluster that look up a node
+ * from the node caches (possibly creating a cache entry if none exists).
+ * There are at least the following:
+ * * pcmk__cluster_lookup_remote_node()
+ * * pcmk__get_node()
+ * * pcmk__node_name_from_uuid()
+ * * pcmk__search_node_caches()
+ *
+ * There's a lot of duplication among them, but they all do slightly
+ * different things. We should try to clean them up and consolidate them to
+ * the extent possible, likely with new helper functions.
+ */
GHashTableIter iter;
crm_node_t *node = NULL;
CRM_CHECK(uuid != NULL, return NULL);
- /* remote nodes have the same uname and uuid */
+ // Remote nodes have the same uname and uuid
if (g_hash_table_lookup(crm_remote_peer_cache, uuid)) {
return uuid;
}
- /* avoid blocking calls where possible */
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if (pcmk__str_eq(node->uuid, uuid, pcmk__str_casei)) {
- if (node->uname != NULL) {
- return node->uname;
- }
- break;
- }
- }
- node = NULL;
-
- if (is_corosync_cluster()) {
- long long id;
-
- if ((pcmk__scan_ll(uuid, &id, 0LL) != pcmk_rc_ok)
- || (id < 1LL) || (id > UINT32_MAX)) {
- crm_err("Invalid Corosync node ID '%s'", uuid);
- return NULL;
- }
-
- node = pcmk__search_cluster_node_cache((uint32_t) id, NULL, NULL);
- if (node != NULL) {
- crm_info("Setting uuid for node %s[%u] to %s",
- node->uname, node->id, uuid);
- node->uuid = strdup(uuid);
return node->uname;
}
- return NULL;
}
-
return NULL;
}
/*!
- * \brief Get a log-friendly string equivalent of a cluster type
+ * \brief Get a log-friendly string equivalent of a cluster layer
*
- * \param[in] type Cluster type
+ * \param[in] layer Cluster layer
*
- * \return Log-friendly string corresponding to \p type
+ * \return Log-friendly string corresponding to \p layer
*/
const char *
-name_for_cluster_type(enum cluster_type_e type)
+pcmk_cluster_layer_text(enum pcmk_cluster_layer layer)
{
- switch (type) {
- case pcmk_cluster_corosync:
+ switch (layer) {
+ case pcmk_cluster_layer_corosync:
return "corosync";
- case pcmk_cluster_unknown:
+ case pcmk_cluster_layer_unknown:
return "unknown";
- case pcmk_cluster_invalid:
+ case pcmk_cluster_layer_invalid:
+ return "invalid";
+ default:
+ crm_err("Invalid cluster layer: %d", layer);
return "invalid";
}
- crm_err("Invalid cluster type: %d", type);
- return "invalid";
}
/*!
- * \brief Get (and validate) the local cluster type
+ * \brief Get and validate the local cluster layer
+ *
+ * If a cluster layer is not configured via the \c PCMK__ENV_CLUSTER_TYPE local
+ * option, this will try to detect an active cluster from among the supported
+ * cluster layers.
+ *
+ * \return Local cluster layer
*
- * \return Local cluster type
- * \note This will fatally exit if the local cluster type is invalid.
+ * \note This will fatally exit if the configured cluster layer is invalid.
*/
-enum cluster_type_e
-get_cluster_type(void)
+enum pcmk_cluster_layer
+pcmk_get_cluster_layer(void)
{
- bool detected = false;
+ static enum pcmk_cluster_layer cluster_layer = pcmk_cluster_layer_unknown;
const char *cluster = NULL;
- static enum cluster_type_e cluster_type = pcmk_cluster_unknown;
- /* Return the previous calculation, if any */
- if (cluster_type != pcmk_cluster_unknown) {
- return cluster_type;
+ // Cluster layer is stable once set
+ if (cluster_layer != pcmk_cluster_layer_unknown) {
+ return cluster_layer;
}
cluster = pcmk__env_option(PCMK__ENV_CLUSTER_TYPE);
+ if (cluster != NULL) {
+ crm_info("Verifying configured cluster layer '%s'", cluster);
+ cluster_layer = pcmk_cluster_layer_invalid;
+
#if SUPPORT_COROSYNC
- /* If nothing is defined in the environment, try corosync (if supported) */
- if (cluster == NULL) {
- crm_debug("Testing with Corosync");
- cluster_type = pcmk__corosync_detect();
- if (cluster_type != pcmk_cluster_unknown) {
- detected = true;
- goto done;
+ if (pcmk__str_eq(cluster, PCMK_VALUE_COROSYNC, pcmk__str_casei)) {
+ cluster_layer = pcmk_cluster_layer_corosync;
}
- }
-#endif
+#endif // SUPPORT_COROSYNC
- /* Something was defined in the environment, test it against what we support */
- crm_info("Verifying cluster type: '%s'",
- ((cluster == NULL)? "-unspecified-" : cluster));
- if (cluster == NULL) {
+ if (cluster_layer == pcmk_cluster_layer_invalid) {
+ crm_notice("This installation does not support the '%s' cluster "
+ "infrastructure: terminating",
+ cluster);
+ crm_exit(CRM_EX_FATAL);
+ }
+ crm_info("Assuming an active '%s' cluster", cluster);
+ } else {
+ // Nothing configured, so test supported cluster layers
#if SUPPORT_COROSYNC
- } else if (pcmk__str_eq(cluster, "corosync", pcmk__str_casei)) {
- cluster_type = pcmk_cluster_corosync;
-#endif
+ crm_debug("Testing with Corosync");
+ if (pcmk__corosync_is_active()) {
+ cluster_layer = pcmk_cluster_layer_corosync;
+ }
+#endif // SUPPORT_COROSYNC
- } else {
- cluster_type = pcmk_cluster_invalid;
- goto done; /* Keep the compiler happy when no stacks are supported */
+ if (cluster_layer == pcmk_cluster_layer_unknown) {
+ crm_notice("Could not determine the current cluster layer");
+ } else {
+ crm_info("Detected an active '%s' cluster",
+ pcmk_cluster_layer_text(cluster_layer));
+ }
}
- done:
- if (cluster_type == pcmk_cluster_unknown) {
- crm_notice("Could not determine the current cluster type");
+ return cluster_layer;
+}
- } else if (cluster_type == pcmk_cluster_invalid) {
- crm_notice("This installation does not support the '%s' cluster infrastructure: terminating.",
- cluster);
- crm_exit(CRM_EX_FATAL);
+// Deprecated functions kept only for backward API compatibility
+// LCOV_EXCL_START
- } else {
- crm_info("%s an active '%s' cluster",
- (detected? "Detected" : "Assuming"),
- name_for_cluster_type(cluster_type));
+#include <crm/cluster/compat.h>
+
+void
+set_uuid(xmlNode *xml, const char *attr, crm_node_t *node)
+{
+ crm_xml_add(xml, attr, pcmk__cluster_node_uuid(node));
+}
+
+gboolean
+crm_cluster_connect(pcmk_cluster_t *cluster)
+{
+ return pcmk_cluster_connect(cluster) == pcmk_rc_ok;
+}
+
+void
+crm_cluster_disconnect(pcmk_cluster_t *cluster)
+{
+ pcmk_cluster_disconnect(cluster);
+}
+
+const char *
+name_for_cluster_type(enum cluster_type_e type)
+{
+ switch (type) {
+ case pcmk_cluster_corosync:
+ return "corosync";
+ case pcmk_cluster_unknown:
+ return "unknown";
+ case pcmk_cluster_invalid:
+ return "invalid";
}
+ crm_err("Invalid cluster type: %d", type);
+ return "invalid";
+}
- return cluster_type;
+enum cluster_type_e
+get_cluster_type(void)
+{
+ return (enum cluster_type_e) pcmk_get_cluster_layer();
}
-/*!
- * \brief Check whether the local cluster is a Corosync cluster
- *
- * \return TRUE if the local cluster is a Corosync cluster, otherwise FALSE
- */
gboolean
is_corosync_cluster(void)
{
- return get_cluster_type() == pcmk_cluster_corosync;
+ return pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync;
}
-// Deprecated functions kept only for backward API compatibility
-// LCOV_EXCL_START
+gboolean
+send_cluster_message(const crm_node_t *node, enum crm_ais_msg_types service,
+ const xmlNode *data, gboolean ordered)
+{
+ return pcmk__cluster_send_message(node, service, data);
+}
-#include <crm/cluster/compat.h>
+const char *
+crm_peer_uuid(crm_node_t *peer)
+{
+ return pcmk__cluster_node_uuid(peer);
+}
-void
-set_uuid(xmlNode *xml, const char *attr, crm_node_t *node)
+char *
+get_node_name(uint32_t nodeid)
+{
+ return pcmk__cluster_node_name(nodeid);
+}
+
+const char *
+get_local_node_name(void)
+{
+ return pcmk__cluster_local_node_name();
+}
+
+const char *
+crm_peer_uname(const char *uuid)
{
- crm_xml_add(xml, attr, crm_peer_uuid(node));
+ return pcmk__node_name_from_uuid(uuid);
}
// LCOV_EXCL_STOP