summaryrefslogtreecommitdiffstats
path: root/daemons/controld/controld_schedulerd.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 06:53:20 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 06:53:20 +0000
commite5a812082ae033afb1eed82c0f2df3d0f6bdc93f (patch)
treea6716c9275b4b413f6c9194798b34b91affb3cc7 /daemons/controld/controld_schedulerd.c
parentInitial commit. (diff)
downloadpacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.tar.xz
pacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.zip
Adding upstream version 2.1.6.upstream/2.1.6
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'daemons/controld/controld_schedulerd.c')
-rw-r--r--daemons/controld/controld_schedulerd.c506
1 files changed, 506 insertions, 0 deletions
diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c
new file mode 100644
index 0000000..912f9a5
--- /dev/null
+++ b/daemons/controld/controld_schedulerd.c
@@ -0,0 +1,506 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <unistd.h> /* pid_t, sleep, ssize_t */
+
+#include <crm/cib.h>
+#include <crm/cluster.h>
+#include <crm/common/xml.h>
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml_internal.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_schedulerd.h>
+
+#include <pacemaker-controld.h>
+
+static void handle_disconnect(void);
+
+static pcmk_ipc_api_t *schedulerd_api = NULL;
+
+/*!
+ * \internal
+ * \brief Close any scheduler connection and free associated memory
+ */
+void
+controld_shutdown_schedulerd_ipc(void)
+{
+ controld_clear_fsa_input_flags(R_PE_REQUIRED);
+ pcmk_disconnect_ipc(schedulerd_api);
+ handle_disconnect();
+
+ pcmk_free_ipc_api(schedulerd_api);
+ schedulerd_api = NULL;
+}
+
+/*!
+ * \internal
+ * \brief Save CIB query result to file, raising FSA error
+ *
+ * \param[in] msg Ignored
+ * \param[in] call_id Call ID of CIB query
+ * \param[in] rc Return code of CIB query
+ * \param[in,out] output Result of CIB query
+ * \param[in] user_data Unique identifier for filename
+ *
+ * \note This is intended to be called after a scheduler connection fails.
+ */
+static void
+save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output,
+ void *user_data)
+{
+ const char *id = user_data;
+
+ register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
+ CRM_CHECK(id != NULL, return);
+
+ if (rc == pcmk_ok) {
+ char *filename = crm_strdup_printf(PE_STATE_DIR "/pe-core-%s.bz2", id);
+
+ if (write_xml_file(output, filename, TRUE) < 0) {
+ crm_err("Could not save Cluster Information Base to %s after scheduler crash",
+ filename);
+ } else {
+ crm_notice("Saved Cluster Information Base to %s after scheduler crash",
+ filename);
+ }
+ free(filename);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Respond to scheduler connection failure
+ */
+static void
+handle_disconnect(void)
+{
+ // If we aren't connected to the scheduler, we can't expect a reply
+ controld_expect_sched_reply(NULL);
+
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_PE_REQUIRED)) {
+ int rc = pcmk_ok;
+ char *uuid_str = crm_generate_uuid();
+
+ crm_crit("Connection to the scheduler failed "
+ CRM_XS " uuid=%s", uuid_str);
+
+ /*
+ * The scheduler died...
+ *
+ * Save the current CIB so that we have a chance of
+ * figuring out what killed it.
+ *
+ * Delay raising the I_ERROR until the query below completes or
+ * 5s is up, whichever comes first.
+ *
+ */
+ rc = controld_globals.cib_conn->cmds->query(controld_globals.cib_conn,
+ NULL, NULL,
+ cib_scope_local);
+ fsa_register_cib_callback(rc, uuid_str, save_cib_contents);
+
+ } else {
+ crm_info("Connection to the scheduler released");
+ }
+
+ controld_clear_fsa_input_flags(R_PE_CONNECTED);
+ controld_trigger_fsa();
+ return;
+}
+
+static void
+handle_reply(pcmk_schedulerd_api_reply_t *reply)
+{
+ const char *msg_ref = NULL;
+
+ if (!AM_I_DC) {
+ return;
+ }
+
+ msg_ref = reply->data.graph.reference;
+
+ if (msg_ref == NULL) {
+ crm_err("%s - Ignoring calculation with no reference", CRM_OP_PECALC);
+
+ } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref,
+ pcmk__str_none)) {
+ ha_msg_input_t fsa_input;
+ xmlNode *crm_data_node;
+
+ controld_stop_sched_timer();
+
+ /* do_te_invoke (which will eventually process the fsa_input we are constructing
+ * here) requires that fsa_input.xml be non-NULL. That will only happen if
+ * copy_ha_msg_input (which is called by register_fsa_input_adv) sees the
+ * fsa_input.msg that it is expecting. The scheduler's IPC dispatch function
+ * gave us the values we need, we just need to put them into XML.
+ *
+ * The name of the top level element here is irrelevant. Nothing checks it.
+ */
+ fsa_input.msg = create_xml_node(NULL, "dummy-reply");
+ crm_xml_add(fsa_input.msg, XML_ATTR_REFERENCE, msg_ref);
+ crm_xml_add(fsa_input.msg, F_CRM_TGRAPH_INPUT, reply->data.graph.input);
+
+ crm_data_node = create_xml_node(fsa_input.msg, F_CRM_DATA);
+ add_node_copy(crm_data_node, reply->data.graph.tgraph);
+ register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
+
+ free_xml(fsa_input.msg);
+
+ } else {
+ crm_info("%s calculation %s is obsolete", CRM_OP_PECALC, msg_ref);
+ }
+}
+
+static void
+scheduler_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type,
+ crm_exit_t status, void *event_data, void *user_data)
+{
+ pcmk_schedulerd_api_reply_t *reply = event_data;
+
+ switch (event_type) {
+ case pcmk_ipc_event_disconnect:
+ handle_disconnect();
+ break;
+
+ case pcmk_ipc_event_reply:
+ handle_reply(reply);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static bool
+new_schedulerd_ipc_connection(void)
+{
+ int rc;
+
+ controld_set_fsa_input_flags(R_PE_REQUIRED);
+
+ if (schedulerd_api == NULL) {
+ rc = pcmk_new_ipc_api(&schedulerd_api, pcmk_ipc_schedulerd);
+
+ if (rc != pcmk_rc_ok) {
+ crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc));
+ return false;
+ }
+ }
+
+ pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL);
+
+ rc = pcmk_connect_ipc(schedulerd_api, pcmk_ipc_dispatch_main);
+ if (rc != pcmk_rc_ok) {
+ crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc));
+ return false;
+ }
+
+ controld_set_fsa_input_flags(R_PE_CONNECTED);
+ return true;
+}
+
+static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc,
+ xmlNode *output, void *user_data);
+
+/* A_PE_START, A_PE_STOP, O_PE_RESTART */
+void
+do_pe_control(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ if (pcmk_is_set(action, A_PE_STOP)) {
+ controld_clear_fsa_input_flags(R_PE_REQUIRED);
+ pcmk_disconnect_ipc(schedulerd_api);
+ handle_disconnect();
+ }
+ if (pcmk_is_set(action, A_PE_START)
+ && !pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
+
+ if (cur_state == S_STOPPING) {
+ crm_info("Ignoring request to connect to scheduler while shutting down");
+
+ } else if (!new_schedulerd_ipc_connection()) {
+ crm_warn("Could not connect to scheduler");
+ register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
+ }
+ }
+}
+
+static int fsa_pe_query = 0;
+static mainloop_timer_t *controld_sched_timer = NULL;
+
+// @TODO Make this a configurable cluster option if there's demand for it
+#define SCHED_TIMEOUT_MS (120000)
+
+/*!
+ * \internal
+ * \brief Handle a timeout waiting for scheduler reply
+ *
+ * \param[in] user_data Ignored
+ *
+ * \return FALSE (indicating that timer should not be restarted)
+ */
+static gboolean
+controld_sched_timeout(gpointer user_data)
+{
+ if (AM_I_DC) {
+ /* If this node is the DC but can't communicate with the scheduler, just
+ * exit (and likely get fenced) so this node doesn't interfere with any
+ * further DC elections.
+ *
+ * @TODO We could try something less drastic first, like disconnecting
+ * and reconnecting to the scheduler, but something is likely going
+ * seriously wrong, so perhaps it's better to just fail as quickly as
+ * possible.
+ */
+ crmd_exit(CRM_EX_FATAL);
+ }
+ return FALSE;
+}
+
+void
+controld_stop_sched_timer(void)
+{
+ if ((controld_sched_timer != NULL)
+ && (controld_globals.fsa_pe_ref != NULL)) {
+ crm_trace("Stopping timer for scheduler reply %s",
+ controld_globals.fsa_pe_ref);
+ }
+ mainloop_timer_stop(controld_sched_timer);
+}
+
+/*!
+ * \internal
+ * \brief Set the scheduler request currently being waited on
+ *
+ * \param[in] ref Request to expect reply to (or NULL for none)
+ *
+ * \note This function takes ownership of \p ref.
+ */
+void
+controld_expect_sched_reply(char *ref)
+{
+ if (ref) {
+ if (controld_sched_timer == NULL) {
+ controld_sched_timer = mainloop_timer_add("scheduler_reply_timer",
+ SCHED_TIMEOUT_MS, FALSE,
+ controld_sched_timeout,
+ NULL);
+ }
+ mainloop_timer_start(controld_sched_timer);
+ } else {
+ controld_stop_sched_timer();
+ }
+ free(controld_globals.fsa_pe_ref);
+ controld_globals.fsa_pe_ref = ref;
+}
+
+/*!
+ * \internal
+ * \brief Free the scheduler reply timer
+ */
+void
+controld_free_sched_timer(void)
+{
+ if (controld_sched_timer != NULL) {
+ mainloop_timer_del(controld_sched_timer);
+ controld_sched_timer = NULL;
+ }
+}
+
+/* A_PE_INVOKE */
+void
+do_pe_invoke(long long action,
+ enum crmd_fsa_cause cause,
+ enum crmd_fsa_state cur_state,
+ enum crmd_fsa_input current_input, fsa_data_t * msg_data)
+{
+ cib_t *cib_conn = controld_globals.cib_conn;
+
+ if (AM_I_DC == FALSE) {
+ crm_err("Not invoking scheduler because not DC: %s",
+ fsa_action2string(action));
+ return;
+ }
+
+ if (!pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
+ if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
+ crm_err("Cannot shut down gracefully without the scheduler");
+ register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL);
+
+ } else {
+ crm_info("Waiting for the scheduler to connect");
+ crmd_fsa_stall(FALSE);
+ controld_set_fsa_action_flags(A_PE_START);
+ controld_trigger_fsa();
+ }
+ return;
+ }
+
+ if (cur_state != S_POLICY_ENGINE) {
+ crm_notice("Not invoking scheduler because in state %s",
+ fsa_state2string(cur_state));
+ return;
+ }
+ if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
+ crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!");
+
+ /* start the join from scratch */
+ register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL);
+ return;
+ }
+
+ fsa_pe_query = cib_conn->cmds->query(cib_conn, NULL, NULL, cib_scope_local);
+
+ crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query,
+ fsa_state2string(controld_globals.fsa_state));
+
+ controld_expect_sched_reply(NULL);
+ fsa_register_cib_callback(fsa_pe_query, NULL, do_pe_invoke_callback);
+}
+
+static void
+force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value)
+{
+ int max = 0;
+ int lpc = 0;
+ const char *xpath_base = NULL;
+ char *xpath_string = NULL;
+ xmlXPathObjectPtr xpathObj = NULL;
+
+ xpath_base = pcmk_cib_xpath_for(XML_CIB_TAG_CRMCONFIG);
+ if (xpath_base == NULL) {
+ crm_err(XML_CIB_TAG_CRMCONFIG " CIB element not known (bug?)");
+ return;
+ }
+
+ xpath_string = crm_strdup_printf("%s//%s//nvpair[@name='%s']",
+ xpath_base, XML_CIB_TAG_PROPSET,
+ attr_name);
+ xpathObj = xpath_search(xml, xpath_string);
+ max = numXpathResults(xpathObj);
+ free(xpath_string);
+
+ for (lpc = 0; lpc < max; lpc++) {
+ xmlNode *match = getXpathResult(xpathObj, lpc);
+ crm_trace("Forcing %s/%s = %s", ID(match), attr_name, attr_value);
+ crm_xml_add(match, XML_NVPAIR_ATTR_VALUE, attr_value);
+ }
+
+ if(max == 0) {
+ xmlNode *configuration = NULL;
+ xmlNode *crm_config = NULL;
+ xmlNode *cluster_property_set = NULL;
+
+ crm_trace("Creating %s-%s for %s=%s",
+ CIB_OPTIONS_FIRST, attr_name, attr_name, attr_value);
+
+ configuration = pcmk__xe_match(xml, XML_CIB_TAG_CONFIGURATION, NULL,
+ NULL);
+ if (configuration == NULL) {
+ configuration = create_xml_node(xml, XML_CIB_TAG_CONFIGURATION);
+ }
+
+ crm_config = pcmk__xe_match(configuration, XML_CIB_TAG_CRMCONFIG, NULL,
+ NULL);
+ if (crm_config == NULL) {
+ crm_config = create_xml_node(configuration, XML_CIB_TAG_CRMCONFIG);
+ }
+
+ cluster_property_set = pcmk__xe_match(crm_config, XML_CIB_TAG_PROPSET,
+ NULL, NULL);
+ if (cluster_property_set == NULL) {
+ cluster_property_set = create_xml_node(crm_config, XML_CIB_TAG_PROPSET);
+ crm_xml_add(cluster_property_set, XML_ATTR_ID, CIB_OPTIONS_FIRST);
+ }
+
+ xml = create_xml_node(cluster_property_set, XML_CIB_TAG_NVPAIR);
+
+ crm_xml_set_id(xml, "%s-%s", CIB_OPTIONS_FIRST, attr_name);
+ crm_xml_add(xml, XML_NVPAIR_ATTR_NAME, attr_name);
+ crm_xml_add(xml, XML_NVPAIR_ATTR_VALUE, attr_value);
+ }
+ freeXpathObject(xpathObj);
+}
+
+static void
+do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ char *ref = NULL;
+ pid_t watchdog = pcmk__locate_sbd();
+
+ if (rc != pcmk_ok) {
+ crm_err("Could not retrieve the Cluster Information Base: %s "
+ CRM_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id);
+ register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
+ return;
+
+ } else if (call_id != fsa_pe_query) {
+ crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query);
+ return;
+
+ } else if (!AM_I_DC
+ || !pcmk_is_set(controld_globals.fsa_input_register,
+ R_PE_CONNECTED)) {
+ crm_debug("No need to invoke the scheduler anymore");
+ return;
+
+ } else if (controld_globals.fsa_state != S_POLICY_ENGINE) {
+ crm_debug("Discarding scheduler request in state: %s",
+ fsa_state2string(controld_globals.fsa_state));
+ return;
+
+ /* this callback counts as 1 */
+ } else if (num_cib_op_callbacks() > 1) {
+ crm_debug("Re-asking for the CIB: %d other peer updates still pending",
+ (num_cib_op_callbacks() - 1));
+ sleep(1);
+ controld_set_fsa_action_flags(A_PE_INVOKE);
+ controld_trigger_fsa();
+ return;
+ }
+
+ CRM_LOG_ASSERT(output != NULL);
+
+ /* Refresh the remote node cache and the known node cache when the
+ * scheduler is invoked */
+ pcmk__refresh_node_caches_from_cib(output);
+
+ crm_xml_add(output, XML_ATTR_DC_UUID, controld_globals.our_uuid);
+ pcmk__xe_set_bool_attr(output, XML_ATTR_HAVE_QUORUM,
+ pcmk_is_set(controld_globals.flags,
+ controld_has_quorum));
+
+ force_local_option(output, XML_ATTR_HAVE_WATCHDOG, pcmk__btoa(watchdog));
+
+ if (pcmk_is_set(controld_globals.flags, controld_ever_had_quorum)
+ && !crm_have_quorum) {
+ crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1);
+ }
+
+ rc = pcmk_rc2legacy(pcmk_schedulerd_api_graph(schedulerd_api, output, &ref));
+
+ if (rc < 0) {
+ crm_err("Could not contact the scheduler: %s " CRM_XS " rc=%d",
+ pcmk_strerror(rc), rc);
+ register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
+ } else {
+ CRM_ASSERT(ref != NULL);
+ controld_expect_sched_reply(ref);
+ crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, "
+ "quorate=%s", fsa_pe_query, controld_globals.fsa_pe_ref,
+ crm_peer_seq, pcmk__btoa(pcmk_is_set(controld_globals.flags,
+ controld_has_quorum)));
+ }
+}