diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 06:53:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 06:53:20 +0000 |
commit | e5a812082ae033afb1eed82c0f2df3d0f6bdc93f (patch) | |
tree | a6716c9275b4b413f6c9194798b34b91affb3cc7 /daemons/pacemakerd/pcmkd_corosync.c | |
parent | Initial commit. (diff) | |
download | pacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.tar.xz pacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.zip |
Adding upstream version 2.1.6.upstream/2.1.6
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'daemons/pacemakerd/pcmkd_corosync.c')
-rw-r--r-- | daemons/pacemakerd/pcmkd_corosync.c | 371 |
1 files changed, 371 insertions, 0 deletions
diff --git a/daemons/pacemakerd/pcmkd_corosync.c b/daemons/pacemakerd/pcmkd_corosync.c new file mode 100644 index 0000000..2648756 --- /dev/null +++ b/daemons/pacemakerd/pcmkd_corosync.c @@ -0,0 +1,371 @@ +/* + * Copyright 2010-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> +#include "pacemakerd.h" + +#include <sys/utsname.h> +#include <sys/stat.h> /* for calls to stat() */ +#include <libgen.h> /* For basename() and dirname() */ + +#include <sys/types.h> +#include <pwd.h> /* For getpwname() */ + +#include <corosync/hdb.h> +#include <corosync/cfg.h> +#include <corosync/cpg.h> +#include <corosync/cmap.h> + +#include <crm/cluster/internal.h> +#include <crm/common/ipc.h> /* for crm_ipc_is_authentic_process */ +#include <crm/common/mainloop.h> + +#include <crm/common/ipc_internal.h> /* PCMK__SPECIAL_PID* */ + +static corosync_cfg_handle_t cfg_handle = 0; +static mainloop_timer_t *reconnect_timer = NULL; + +/* =::=::=::= CFG - Shutdown stuff =::=::=::= */ + +static void +cfg_shutdown_callback(corosync_cfg_handle_t h, corosync_cfg_shutdown_flags_t flags) +{ + crm_info("Corosync wants to shut down: %s", + (flags == COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE) ? "immediate" : + (flags == COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS) ? "forced" : "optional"); + + /* Never allow corosync to shut down while we're running */ + corosync_cfg_replyto_shutdown(h, COROSYNC_CFG_SHUTDOWN_FLAG_NO); +} + +static corosync_cfg_callbacks_t cfg_callbacks = { + .corosync_cfg_shutdown_callback = cfg_shutdown_callback, +}; + +static int +pcmk_cfg_dispatch(gpointer user_data) +{ + corosync_cfg_handle_t *handle = (corosync_cfg_handle_t *) user_data; + cs_error_t rc = corosync_cfg_dispatch(*handle, CS_DISPATCH_ALL); + + if (rc != CS_OK) { + return -1; + } + return 0; +} + +static void +close_cfg(void) +{ + if (cfg_handle != 0) { +#ifdef HAVE_COROSYNC_CFG_TRACKSTART + /* Ideally, we would call corosync_cfg_trackstop(cfg_handle) here, but a + * bug in corosync 3.1.1 and 3.1.2 makes it hang forever. Thankfully, + * it's not necessary since we exit immediately after this. + */ +#endif + corosync_cfg_finalize(cfg_handle); + cfg_handle = 0; + } +} + +static gboolean +cluster_reconnect_cb(gpointer data) +{ + if (cluster_connect_cfg()) { + mainloop_timer_del(reconnect_timer); + reconnect_timer = NULL; + crm_notice("Cluster reconnect succeeded"); + mcp_read_config(); + restart_cluster_subdaemons(); + return G_SOURCE_REMOVE; + } else { + crm_info("Cluster reconnect failed " + "(connection will be reattempted once per second)"); + } + /* + * In theory this will continue forever. In practice the CIB connection from + * attrd will timeout and shut down Pacemaker when it gets bored. + */ + return G_SOURCE_CONTINUE; +} + + +static void +cfg_connection_destroy(gpointer user_data) +{ + crm_warn("Lost connection to cluster layer " + "(connection will be reattempted once per second)"); + corosync_cfg_finalize(cfg_handle); + cfg_handle = 0; + reconnect_timer = mainloop_timer_add("corosync reconnect", 1000, TRUE, cluster_reconnect_cb, NULL); + mainloop_timer_start(reconnect_timer); +} + +void +cluster_disconnect_cfg(void) +{ + close_cfg(); + if (reconnect_timer != NULL) { + /* The mainloop should be gone by this point, so this isn't necessary, + * but cleaning up memory should make valgrind happier. + */ + mainloop_timer_del(reconnect_timer); + reconnect_timer = NULL; + } +} + +#define cs_repeat(counter, max, code) do { \ + code; \ + if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ + counter++; \ + crm_debug("Retrying Corosync operation after %ds", counter); \ + sleep(counter); \ + } else { \ + break; \ + } \ + } while(counter < max) + +gboolean +cluster_connect_cfg(void) +{ + cs_error_t rc; + int fd = -1, retries = 0, rv; + uid_t found_uid = 0; + gid_t found_gid = 0; + pid_t found_pid = 0; + uint32_t nodeid; + + static struct mainloop_fd_callbacks cfg_fd_callbacks = { + .dispatch = pcmk_cfg_dispatch, + .destroy = cfg_connection_destroy, + }; + + cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks)); + + if (rc != CS_OK) { + crm_crit("Could not connect to Corosync CFG: %s " CRM_XS " rc=%d", + cs_strerror(rc), rc); + return FALSE; + } + + rc = corosync_cfg_fd_get(cfg_handle, &fd); + if (rc != CS_OK) { + crm_crit("Could not get Corosync CFG descriptor: %s " CRM_XS " rc=%d", + cs_strerror(rc), rc); + goto bail; + } + + /* CFG provider run as root (in given user namespace, anyway)? */ + if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, + &found_uid, &found_gid))) { + crm_crit("Rejecting Corosync CFG provider because process %lld " + "is running as uid %lld gid %lld, not root", + (long long) PCMK__SPECIAL_PID_AS_0(found_pid), + (long long) found_uid, (long long) found_gid); + goto bail; + } else if (rv < 0) { + crm_crit("Could not authenticate Corosync CFG provider: %s " + CRM_XS " rc=%d", strerror(-rv), -rv); + goto bail; + } + + retries = 0; + cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, &nodeid)); + if (rc != CS_OK) { + crm_crit("Could not get local node ID from Corosync: %s " + CRM_XS " rc=%d", cs_strerror(rc), rc); + goto bail; + } + crm_debug("Corosync reports local node ID is %lu", (unsigned long) nodeid); + +#ifdef HAVE_COROSYNC_CFG_TRACKSTART + retries = 0; + cs_repeat(retries, 30, rc = corosync_cfg_trackstart(cfg_handle, 0)); + if (rc != CS_OK) { + crm_crit("Could not enable Corosync CFG shutdown tracker: %s " CRM_XS " rc=%d", + cs_strerror(rc), rc); + goto bail; + } +#endif + + mainloop_add_fd("corosync-cfg", G_PRIORITY_DEFAULT, fd, &cfg_handle, &cfg_fd_callbacks); + return TRUE; + + bail: + corosync_cfg_finalize(cfg_handle); + return FALSE; +} + +void +pcmkd_shutdown_corosync(void) +{ + cs_error_t rc; + + if (cfg_handle == 0) { + crm_warn("Unable to shut down Corosync: No connection"); + return; + } + crm_info("Asking Corosync to shut down"); + rc = corosync_cfg_try_shutdown(cfg_handle, + COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE); + if (rc == CS_OK) { + close_cfg(); + } else { + crm_warn("Corosync shutdown failed: %s " CRM_XS " rc=%d", + cs_strerror(rc), rc); + } +} + +bool +pcmkd_corosync_connected(void) +{ + cpg_handle_t local_handle = 0; + cpg_model_v1_data_t cpg_model_info = {CPG_MODEL_V1, NULL, NULL, NULL, 0}; + int fd = -1; + + if (cpg_model_initialize(&local_handle, CPG_MODEL_V1, (cpg_model_data_t *) &cpg_model_info, NULL) != CS_OK) { + return false; + } + + if (cpg_fd_get(local_handle, &fd) != CS_OK) { + return false; + } + + cpg_finalize(local_handle); + + return true; +} + +/* =::=::=::= Configuration =::=::=::= */ +static int +get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, char **value, + const char *fallback) +{ + int rc = 0, retries = 0; + + cs_repeat(retries, 5, rc = cmap_get_string(object_handle, key, value)); + if (rc != CS_OK) { + crm_trace("Search for %s failed %d, defaulting to %s", key, rc, fallback); + pcmk__str_update(value, fallback); + } + crm_trace("%s: %s", key, *value); + return rc; +} + +gboolean +mcp_read_config(void) +{ + cs_error_t rc = CS_OK; + int retries = 0; + cmap_handle_t local_handle; + uint64_t config = 0; + int fd = -1; + uid_t found_uid = 0; + gid_t found_gid = 0; + pid_t found_pid = 0; + int rv; + enum cluster_type_e stack; + + // There can be only one possibility + do { + rc = pcmk__init_cmap(&local_handle); + if (rc != CS_OK) { + retries++; + crm_info("Could not connect to Corosync CMAP: %s (retrying in %ds) " + CRM_XS " rc=%d", cs_strerror(rc), retries, rc); + sleep(retries); + + } else { + break; + } + + } while (retries < 5); + + if (rc != CS_OK) { + crm_crit("Could not connect to Corosync CMAP: %s " + CRM_XS " rc=%d", cs_strerror(rc), rc); + return FALSE; + } + + rc = cmap_fd_get(local_handle, &fd); + if (rc != CS_OK) { + crm_crit("Could not get Corosync CMAP descriptor: %s " CRM_XS " rc=%d", + cs_strerror(rc), rc); + cmap_finalize(local_handle); + return FALSE; + } + + /* CMAP provider run as root (in given user namespace, anyway)? */ + if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, + &found_uid, &found_gid))) { + crm_crit("Rejecting Corosync CMAP provider because process %lld " + "is running as uid %lld gid %lld, not root", + (long long) PCMK__SPECIAL_PID_AS_0(found_pid), + (long long) found_uid, (long long) found_gid); + cmap_finalize(local_handle); + return FALSE; + } else if (rv < 0) { + crm_crit("Could not authenticate Corosync CMAP provider: %s " + CRM_XS " rc=%d", strerror(-rv), -rv); + cmap_finalize(local_handle); + return FALSE; + } + + stack = get_cluster_type(); + if (stack != pcmk_cluster_corosync) { + crm_crit("Expected Corosync cluster layer but detected %s " + CRM_XS " stack=%d", name_for_cluster_type(stack), stack); + return FALSE; + } + + crm_info("Reading configuration for %s stack", + name_for_cluster_type(stack)); + pcmk__set_env_option(PCMK__ENV_CLUSTER_TYPE, "corosync"); + pcmk__set_env_option(PCMK__ENV_QUORUM_TYPE, "corosync"); + + // If debug logging is not configured, check whether corosync has it + if (pcmk__env_option(PCMK__ENV_DEBUG) == NULL) { + char *debug_enabled = NULL; + + get_config_opt(config, local_handle, "logging.debug", &debug_enabled, "off"); + + if (crm_is_true(debug_enabled)) { + pcmk__set_env_option(PCMK__ENV_DEBUG, "1"); + if (get_crm_log_level() < LOG_DEBUG) { + set_crm_log_level(LOG_DEBUG); + } + + } else { + pcmk__set_env_option(PCMK__ENV_DEBUG, "0"); + } + + free(debug_enabled); + } + + if(local_handle){ + gid_t gid = 0; + if (pcmk_daemon_user(NULL, &gid) < 0) { + crm_warn("Could not authorize group with Corosync " CRM_XS + " No group found for user %s", CRM_DAEMON_USER); + + } else { + char key[PATH_MAX]; + snprintf(key, PATH_MAX, "uidgid.gid.%u", gid); + rc = cmap_set_uint8(local_handle, key, 1); + if (rc != CS_OK) { + crm_warn("Could not authorize group with Corosync: %s " CRM_XS + " group=%u rc=%d", pcmk__cs_err_str(rc), gid, rc); + } + } + } + cmap_finalize(local_handle); + + return TRUE; +} |