diff options
Diffstat (limited to 'daemons/controld/controld_cib.c')
-rw-r--r-- | daemons/controld/controld_cib.c | 1138 |
1 files changed, 1138 insertions, 0 deletions
diff --git a/daemons/controld/controld_cib.c b/daemons/controld/controld_cib.c new file mode 100644 index 0000000..94b99dd --- /dev/null +++ b/daemons/controld/controld_cib.c @@ -0,0 +1,1138 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <unistd.h> /* sleep */ + +#include <crm/common/alerts_internal.h> +#include <crm/common/xml.h> +#include <crm/crm.h> +#include <crm/msg_xml.h> +#include <crm/lrmd_internal.h> + +#include <pacemaker-controld.h> + +// Call ID of the most recent in-progress CIB resource update (or 0 if none) +static int pending_rsc_update = 0; + +// Call IDs of requested CIB replacements that won't trigger a new election +// (used as a set of gint values) +static GHashTable *cib_replacements = NULL; + +/*! + * \internal + * \brief Store the call ID of a CIB replacement that the controller requested + * + * The \p do_cib_replaced() callback function will avoid triggering a new + * election when we're notified of one of these expected replacements. + * + * \param[in] call_id CIB call ID (or 0 for a synchronous call) + * + * \note This function should be called after making any asynchronous CIB + * request (or before making any synchronous CIB request) that may replace + * part of the nodes or status section. This may include CIB sync calls. + */ +void +controld_record_cib_replace_call(int call_id) +{ + CRM_CHECK(call_id >= 0, return); + + if (cib_replacements == NULL) { + cib_replacements = g_hash_table_new(NULL, NULL); + } + + /* If the call ID is already present in the table, then it's old. We may not + * be removing them properly, and we could improperly ignore replacement + * notifications if cib_t:call_id wraps around. + */ + CRM_LOG_ASSERT(g_hash_table_add(cib_replacements, + GINT_TO_POINTER((gint) call_id))); +} + +/*! + * \internal + * \brief Remove the call ID of a CIB replacement from the replacements table + * + * \param[in] call_id CIB call ID (or 0 for a synchronous call) + * + * \return \p true if \p call_id was found in the table, or \p false otherwise + * + * \note CIB notifications run before CIB callbacks. If this function is called + * from within a callback, \p do_cib_replaced() will have removed + * \p call_id from the table first if relevant changes triggered a + * notification. + */ +bool +controld_forget_cib_replace_call(int call_id) +{ + CRM_CHECK(call_id >= 0, return false); + + if (cib_replacements == NULL) { + return false; + } + return g_hash_table_remove(cib_replacements, + GINT_TO_POINTER((gint) call_id)); +} + +/*! + * \internal + * \brief Empty the hash table containing call IDs of CIB replacement requests + */ +void +controld_forget_all_cib_replace_calls(void) +{ + if (cib_replacements != NULL) { + g_hash_table_remove_all(cib_replacements); + } +} + +/*! + * \internal + * \brief Free the hash table containing call IDs of CIB replacement requests + */ +void +controld_destroy_cib_replacements_table(void) +{ + if (cib_replacements != NULL) { + g_hash_table_destroy(cib_replacements); + cib_replacements = NULL; + } +} + +/*! + * \internal + * \brief Respond to a dropped CIB connection + * + * \param[in] user_data CIB connection that dropped + */ +static void +handle_cib_disconnect(gpointer user_data) +{ + CRM_LOG_ASSERT(user_data == controld_globals.cib_conn); + + controld_trigger_fsa(); + controld_globals.cib_conn->state = cib_disconnected; + + if (pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { + // @TODO This should trigger a reconnect, not a shutdown + crm_crit("Lost connection to the CIB manager, shutting down"); + register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); + controld_clear_fsa_input_flags(R_CIB_CONNECTED); + + } else { // Expected + crm_info("Connection to the CIB manager terminated"); + } +} + +static void +do_cib_updated(const char *event, xmlNode * msg) +{ + if (pcmk__alert_in_patchset(msg, TRUE)) { + controld_trigger_config(); + } +} + +static void +do_cib_replaced(const char *event, xmlNode * msg) +{ + int call_id = 0; + const char *client_id = crm_element_value(msg, F_CIB_CLIENTID); + uint32_t change_section = cib_change_section_nodes + |cib_change_section_status; + long long value = 0; + + crm_debug("Updating the CIB after a replace: DC=%s", pcmk__btoa(AM_I_DC)); + if (!AM_I_DC) { + return; + } + + if ((crm_element_value_int(msg, F_CIB_CALLID, &call_id) == 0) + && pcmk__str_eq(client_id, controld_globals.cib_client_id, + pcmk__str_none) + && controld_forget_cib_replace_call(call_id)) { + // We requested this replace op. No need to restart the join. + return; + } + + if ((crm_element_value_ll(msg, F_CIB_CHANGE_SECTION, &value) < 0) + || (value < 0) || (value > UINT32_MAX)) { + + crm_trace("Couldn't parse '%s' from message", F_CIB_CHANGE_SECTION); + } else { + change_section = (uint32_t) value; + } + + if (pcmk_any_flags_set(change_section, cib_change_section_nodes + |cib_change_section_status)) { + + /* start the join process again so we get everyone's LRM status */ + populate_cib_nodes(node_update_quick|node_update_all, __func__); + + register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); + } +} + +void +controld_disconnect_cib_manager(void) +{ + cib_t *cib_conn = controld_globals.cib_conn; + + CRM_ASSERT(cib_conn != NULL); + + crm_info("Disconnecting from the CIB manager"); + + controld_clear_fsa_input_flags(R_CIB_CONNECTED); + + cib_conn->cmds->del_notify_callback(cib_conn, T_CIB_REPLACE_NOTIFY, + do_cib_replaced); + cib_conn->cmds->del_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY, + do_cib_updated); + cib_free_callbacks(cib_conn); + + if (cib_conn->state != cib_disconnected) { + cib_conn->cmds->set_secondary(cib_conn, + cib_scope_local|cib_discard_reply); + cib_conn->cmds->signoff(cib_conn); + } + + crm_notice("Disconnected from the CIB manager"); +} + +/* A_CIB_STOP, A_CIB_START, O_CIB_RESTART */ +void +do_cib_control(long long action, + enum crmd_fsa_cause cause, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) +{ + static int cib_retries = 0; + + cib_t *cib_conn = controld_globals.cib_conn; + + void (*dnotify_fn) (gpointer user_data) = handle_cib_disconnect; + void (*replace_cb) (const char *event, xmlNodePtr msg) = do_cib_replaced; + void (*update_cb) (const char *event, xmlNodePtr msg) = do_cib_updated; + + int rc = pcmk_ok; + + CRM_ASSERT(cib_conn != NULL); + + if (pcmk_is_set(action, A_CIB_STOP)) { + if ((cib_conn->state != cib_disconnected) + && (pending_rsc_update != 0)) { + + crm_info("Waiting for resource update %d to complete", + pending_rsc_update); + crmd_fsa_stall(FALSE); + return; + } + controld_disconnect_cib_manager(); + } + + if (!pcmk_is_set(action, A_CIB_START)) { + return; + } + + if (cur_state == S_STOPPING) { + crm_err("Ignoring request to connect to the CIB manager after " + "shutdown"); + return; + } + + rc = cib_conn->cmds->signon(cib_conn, CRM_SYSTEM_CRMD, + cib_command_nonblocking); + + if (rc != pcmk_ok) { + // A short wait that usually avoids stalling the FSA + sleep(1); + rc = cib_conn->cmds->signon(cib_conn, CRM_SYSTEM_CRMD, + cib_command_nonblocking); + } + + if (rc != pcmk_ok) { + crm_info("Could not connect to the CIB manager: %s", pcmk_strerror(rc)); + + } else if (cib_conn->cmds->set_connection_dnotify(cib_conn, + dnotify_fn) != pcmk_ok) { + crm_err("Could not set dnotify callback"); + + } else if (cib_conn->cmds->add_notify_callback(cib_conn, + T_CIB_REPLACE_NOTIFY, + replace_cb) != pcmk_ok) { + crm_err("Could not set CIB notification callback (replace)"); + + } else if (cib_conn->cmds->add_notify_callback(cib_conn, + T_CIB_DIFF_NOTIFY, + update_cb) != pcmk_ok) { + crm_err("Could not set CIB notification callback (update)"); + + } else { + controld_set_fsa_input_flags(R_CIB_CONNECTED); + cib_retries = 0; + cib_conn->cmds->client_id(cib_conn, &controld_globals.cib_client_id, + NULL); + } + + if (!pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { + cib_retries++; + + if (cib_retries < 30) { + crm_warn("Couldn't complete CIB registration %d times... " + "pause and retry", cib_retries); + controld_start_wait_timer(); + crmd_fsa_stall(FALSE); + + } else { + crm_err("Could not complete CIB registration %d times... " + "hard error", cib_retries); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } + } +} + +#define MIN_CIB_OP_TIMEOUT (30) + +/*! + * \internal + * \brief Get the timeout (in seconds) that should be used with CIB operations + * + * \return The maximum of 30 seconds, the value of the PCMK_cib_timeout + * environment variable, or 10 seconds times one more than the number of + * nodes in the cluster. + */ +unsigned int +cib_op_timeout(void) +{ + static int env_timeout = -1; + unsigned int calculated_timeout = 0; + + if (env_timeout == -1) { + const char *env = getenv("PCMK_cib_timeout"); + + pcmk__scan_min_int(env, &env_timeout, MIN_CIB_OP_TIMEOUT); + crm_trace("Minimum CIB op timeout: %ds (environment: %s)", + env_timeout, (env? env : "none")); + } + + calculated_timeout = 1 + crm_active_peers(); + if (crm_remote_peer_cache) { + calculated_timeout += g_hash_table_size(crm_remote_peer_cache); + } + calculated_timeout *= 10; + + calculated_timeout = QB_MAX(calculated_timeout, env_timeout); + crm_trace("Calculated timeout: %us", calculated_timeout); + + if (controld_globals.cib_conn) { + controld_globals.cib_conn->call_timeout = calculated_timeout; + } + return calculated_timeout; +} + +/*! + * \internal + * \brief Get CIB call options to use local scope if primary is unavailable + * + * \return CIB call options + */ +int +crmd_cib_smart_opt(void) +{ + int call_opt = cib_none; + + if ((controld_globals.fsa_state == S_ELECTION) + || (controld_globals.fsa_state == S_PENDING)) { + crm_info("Sending update to local CIB in state: %s", + fsa_state2string(controld_globals.fsa_state)); + cib__set_call_options(call_opt, "update", cib_scope_local); + } + return call_opt; +} + +static void +cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, + void *user_data) +{ + char *desc = user_data; + + if (rc == 0) { + crm_debug("Deletion of %s (via CIB call %d) succeeded", desc, call_id); + } else { + crm_warn("Deletion of %s (via CIB call %d) failed: %s " CRM_XS " rc=%d", + desc, call_id, pcmk_strerror(rc), rc); + } +} + +// Searches for various portions of node_state to delete + +// Match a particular node's node_state (takes node name 1x) +#define XPATH_NODE_STATE "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" + +// Node's lrm section (name 1x) +#define XPATH_NODE_LRM XPATH_NODE_STATE "/" XML_CIB_TAG_LRM + +/* Node's lrm_rsc_op entries and lrm_resource entries without unexpired lock + * (name 2x, (seconds_since_epoch - XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT) 1x) + */ +#define XPATH_NODE_LRM_UNLOCKED XPATH_NODE_STATE "//" XML_LRM_TAG_RSC_OP \ + "|" XPATH_NODE_STATE \ + "//" XML_LRM_TAG_RESOURCE \ + "[not(@" XML_CONFIG_ATTR_SHUTDOWN_LOCK ") " \ + "or " XML_CONFIG_ATTR_SHUTDOWN_LOCK "<%lld]" + +// Node's transient_attributes section (name 1x) +#define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" XML_TAG_TRANSIENT_NODEATTRS + +// Everything under node_state (name 1x) +#define XPATH_NODE_ALL XPATH_NODE_STATE "/*" + +/* Unlocked history + transient attributes + * (name 2x, (seconds_since_epoch - XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT) 1x, + * name 1x) + */ +#define XPATH_NODE_ALL_UNLOCKED XPATH_NODE_LRM_UNLOCKED "|" XPATH_NODE_ATTRS + +/*! + * \internal + * \brief Delete subsection of a node's CIB node_state + * + * \param[in] uname Desired node + * \param[in] section Subsection of node_state to delete + * \param[in] options CIB call options to use + */ +void +controld_delete_node_state(const char *uname, enum controld_section_e section, + int options) +{ + cib_t *cib_conn = controld_globals.cib_conn; + + char *xpath = NULL; + char *desc = NULL; + + // Shutdown locks that started before this time are expired + long long expire = (long long) time(NULL) + - controld_globals.shutdown_lock_limit; + + CRM_CHECK(uname != NULL, return); + switch (section) { + case controld_section_lrm: + xpath = crm_strdup_printf(XPATH_NODE_LRM, uname); + desc = crm_strdup_printf("resource history for node %s", uname); + break; + case controld_section_lrm_unlocked: + xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED, + uname, uname, expire); + desc = crm_strdup_printf("resource history (other than shutdown " + "locks) for node %s", uname); + break; + case controld_section_attrs: + xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname); + desc = crm_strdup_printf("transient attributes for node %s", uname); + break; + case controld_section_all: + xpath = crm_strdup_printf(XPATH_NODE_ALL, uname); + desc = crm_strdup_printf("all state for node %s", uname); + break; + case controld_section_all_unlocked: + xpath = crm_strdup_printf(XPATH_NODE_ALL_UNLOCKED, + uname, uname, expire, uname); + desc = crm_strdup_printf("all state (other than shutdown locks) " + "for node %s", uname); + break; + } + + if (cib_conn == NULL) { + crm_warn("Unable to delete %s: no CIB connection", desc); + free(desc); + } else { + int call_id; + + cib__set_call_options(options, "node state deletion", + cib_xpath|cib_multiple); + call_id = cib_conn->cmds->remove(cib_conn, xpath, NULL, options); + crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s", + desc, call_id, xpath); + fsa_register_cib_callback(call_id, desc, cib_delete_callback); + // CIB library handles freeing desc + } + free(xpath); +} + +// Takes node name and resource ID +#define XPATH_RESOURCE_HISTORY "//" XML_CIB_TAG_STATE \ + "[@" XML_ATTR_UNAME "='%s']/" \ + XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ + "/" XML_LRM_TAG_RESOURCE \ + "[@" XML_ATTR_ID "='%s']" +// @TODO could add "and @XML_CONFIG_ATTR_SHUTDOWN_LOCK" to limit to locks + +/*! + * \internal + * \brief Clear resource history from CIB for a given resource and node + * + * \param[in] rsc_id ID of resource to be cleared + * \param[in] node Node whose resource history should be cleared + * \param[in] user_name ACL user name to use + * \param[in] call_options CIB call options + * + * \return Standard Pacemaker return code + */ +int +controld_delete_resource_history(const char *rsc_id, const char *node, + const char *user_name, int call_options) +{ + char *desc = NULL; + char *xpath = NULL; + int rc = pcmk_rc_ok; + + CRM_CHECK((rsc_id != NULL) && (node != NULL), return EINVAL); + + desc = crm_strdup_printf("resource history for %s on %s", rsc_id, node); + if (controld_globals.cib_conn == NULL) { + crm_err("Unable to clear %s: no CIB connection", desc); + free(desc); + return ENOTCONN; + } + + // Ask CIB to delete the entry + xpath = crm_strdup_printf(XPATH_RESOURCE_HISTORY, node, rsc_id); + rc = cib_internal_op(controld_globals.cib_conn, PCMK__CIB_REQUEST_DELETE, + NULL, xpath, NULL, NULL, call_options|cib_xpath, + user_name); + + if (rc < 0) { + rc = pcmk_legacy2rc(rc); + crm_err("Could not delete resource status of %s on %s%s%s: %s " + CRM_XS " rc=%d", rsc_id, node, + (user_name? " for user " : ""), (user_name? user_name : ""), + pcmk_rc_str(rc), rc); + free(desc); + free(xpath); + return rc; + } + + if (pcmk_is_set(call_options, cib_sync_call)) { + if (pcmk_is_set(call_options, cib_dryrun)) { + crm_debug("Deletion of %s would succeed", desc); + } else { + crm_debug("Deletion of %s succeeded", desc); + } + free(desc); + + } else { + crm_info("Clearing %s (via CIB call %d) " CRM_XS " xpath=%s", + desc, rc, xpath); + fsa_register_cib_callback(rc, desc, cib_delete_callback); + // CIB library handles freeing desc + } + + free(xpath); + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Build XML and string of parameters meeting some criteria, for digest + * + * \param[in] op Executor event with parameter table to use + * \param[in] metadata Parsed meta-data for executed resource agent + * \param[in] param_type Flag used for selection criteria + * \param[out] result Will be set to newly created XML with selected + * parameters as attributes + * + * \return Newly allocated space-separated string of parameter names + * \note Selection criteria varies by param_type: for the restart digest, we + * want parameters that are *not* marked reloadable (OCF 1.1) or that + * *are* marked unique (pre-1.1), for both string and XML results; for the + * secure digest, we want parameters that *are* marked private for the + * string, but parameters that are *not* marked private for the XML. + * \note It is the caller's responsibility to free the string return value with + * \p g_string_free() and the XML result with \p free_xml(). + */ +static GString * +build_parameter_list(const lrmd_event_data_t *op, + const struct ra_metadata_s *metadata, + enum ra_param_flags_e param_type, xmlNode **result) +{ + GString *list = NULL; + + *result = create_xml_node(NULL, XML_TAG_PARAMS); + + /* Consider all parameters only except private ones to be consistent with + * what scheduler does with calculate_secure_digest(). + */ + if (param_type == ra_param_private + && compare_version(controld_globals.dc_version, "3.16.0") >= 0) { + g_hash_table_foreach(op->params, hash2field, *result); + pcmk__filter_op_for_digest(*result); + } + + for (GList *iter = metadata->ra_params; iter != NULL; iter = iter->next) { + struct ra_param_s *param = (struct ra_param_s *) iter->data; + + bool accept_for_list = false; + bool accept_for_xml = false; + + switch (param_type) { + case ra_param_reloadable: + accept_for_list = !pcmk_is_set(param->rap_flags, param_type); + accept_for_xml = accept_for_list; + break; + + case ra_param_unique: + accept_for_list = pcmk_is_set(param->rap_flags, param_type); + accept_for_xml = accept_for_list; + break; + + case ra_param_private: + accept_for_list = pcmk_is_set(param->rap_flags, param_type); + accept_for_xml = !accept_for_list; + break; + } + + if (accept_for_list) { + crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type)); + + if (list == NULL) { + // We will later search for " WORD ", so start list with a space + pcmk__add_word(&list, 256, " "); + } + pcmk__add_word(&list, 0, param->rap_name); + + } else { + crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type)); + } + + if (accept_for_xml) { + const char *v = g_hash_table_lookup(op->params, param->rap_name); + + if (v != NULL) { + crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v); + crm_xml_add(*result, param->rap_name, v); + } + + } else { + crm_trace("Removing attr %s from the xml result", param->rap_name); + xml_remove_prop(*result, param->rap_name); + } + } + + if (list != NULL) { + // We will later search for " WORD ", so end list with a space + pcmk__add_word(&list, 0, " "); + } + return list; +} + +static void +append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, + xmlNode *update, const char *version) +{ + GString *list = NULL; + char *digest = NULL; + xmlNode *restart = NULL; + + CRM_LOG_ASSERT(op->params != NULL); + + if (op->interval_ms > 0) { + /* monitors are not reloadable */ + return; + } + + if (pcmk_is_set(metadata->ra_flags, ra_supports_reload_agent)) { + // Add parameters not marked reloadable to the "op-force-restart" list + list = build_parameter_list(op, metadata, ra_param_reloadable, + &restart); + + } else if (pcmk_is_set(metadata->ra_flags, ra_supports_legacy_reload)) { + /* @COMPAT pre-OCF-1.1 resource agents + * + * Before OCF 1.1, Pacemaker abused "unique=0" to indicate + * reloadability. Add any parameters with unique="1" to the + * "op-force-restart" list. + */ + list = build_parameter_list(op, metadata, ra_param_unique, &restart); + + } else { + // Resource does not support agent reloads + return; + } + + digest = calculate_operation_digest(restart, version); + /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload, + * no matter if it actually supports any parameters with unique="1"). */ + crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, + (list == NULL)? "" : (const char *) list->str); + crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); + + if ((list != NULL) && (list->len > 0)) { + crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str); + } else { + crm_trace("%s: %s", op->rsc_id, digest); + } + + if (list != NULL) { + g_string_free(list, TRUE); + } + free_xml(restart); + free(digest); +} + +static void +append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, + xmlNode *update, const char *version) +{ + GString *list = NULL; + char *digest = NULL; + xmlNode *secure = NULL; + + CRM_LOG_ASSERT(op->params != NULL); + + /* + * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the + * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on + * the insecure ones + */ + list = build_parameter_list(op, metadata, ra_param_private, &secure); + + if (list != NULL) { + digest = calculate_operation_digest(secure, version); + crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, (const char *) list->str); + crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest); + + crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str); + g_string_free(list, TRUE); + } else { + crm_trace("%s: no secure parameters", op->rsc_id); + } + + free_xml(secure); + free(digest); +} + +/*! + * \internal + * \brief Create XML for a resource history entry + * + * \param[in] func Function name of caller + * \param[in,out] parent XML to add entry to + * \param[in] rsc Affected resource + * \param[in,out] op Action to add an entry for (or NULL to do nothing) + * \param[in] node_name Node where action occurred + */ +void +controld_add_resource_history_xml_as(const char *func, xmlNode *parent, + const lrmd_rsc_info_t *rsc, + lrmd_event_data_t *op, + const char *node_name) +{ + int target_rc = 0; + xmlNode *xml_op = NULL; + struct ra_metadata_s *metadata = NULL; + const char *caller_version = NULL; + lrm_state_t *lrm_state = NULL; + + if (op == NULL) { + return; + } + + target_rc = rsc_op_expected_rc(op); + + caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION); + CRM_CHECK(caller_version != NULL, caller_version = CRM_FEATURE_SET); + + xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc, + controld_globals.our_nodename, func); + if (xml_op == NULL) { + return; + } + + if ((rsc == NULL) || (op->params == NULL) + || !crm_op_needs_metadata(rsc->standard, op->op_type)) { + + crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)", + op->op_type, op->rsc_id, op->params, rsc); + return; + } + + lrm_state = lrm_state_find(node_name); + if (lrm_state == NULL) { + crm_warn("Cannot calculate digests for operation " PCMK__OP_FMT + " because we have no connection to executor for %s", + op->rsc_id, op->op_type, op->interval_ms, node_name); + return; + } + + /* Ideally the metadata is cached, and the agent is just a fallback. + * + * @TODO Go through all callers and ensure they get metadata asynchronously + * first. + */ + metadata = controld_get_rsc_metadata(lrm_state, rsc, + controld_metadata_from_agent + |controld_metadata_from_cache); + if (metadata == NULL) { + return; + } + + crm_trace("Including additional digests for %s:%s:%s", + rsc->standard, rsc->provider, rsc->type); + append_restart_list(op, metadata, xml_op, caller_version); + append_secure_list(op, metadata, xml_op, caller_version); + + return; +} + +/*! + * \internal + * \brief Record an action as pending in the CIB, if appropriate + * + * \param[in] node_name Node where the action is pending + * \param[in] rsc Resource that action is for + * \param[in,out] op Pending action + * + * \return true if action was recorded in CIB, otherwise false + */ +bool +controld_record_pending_op(const char *node_name, const lrmd_rsc_info_t *rsc, + lrmd_event_data_t *op) +{ + const char *record_pending = NULL; + + CRM_CHECK((node_name != NULL) && (rsc != NULL) && (op != NULL), + return false); + + // Never record certain operation types as pending + if ((op->op_type == NULL) || (op->params == NULL) + || !controld_action_is_recordable(op->op_type)) { + return false; + } + + // Check action's record-pending meta-attribute (defaults to true) + record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING); + if ((record_pending != NULL) && !crm_is_true(record_pending)) { + return false; + } + + op->call_id = -1; + op->t_run = time(NULL); + op->t_rcchange = op->t_run; + + lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); + + crm_debug("Recording pending %s-interval %s for %s on %s in the CIB", + pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id, + node_name); + controld_update_resource_history(node_name, rsc, op, 0); + return true; +} + +static void +cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + switch (rc) { + case pcmk_ok: + case -pcmk_err_diff_failed: + case -pcmk_err_diff_resync: + crm_trace("Resource update %d complete: rc=%d", call_id, rc); + break; + default: + crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc)); + } + + if (call_id == pending_rsc_update) { + pending_rsc_update = 0; + controld_trigger_fsa(); + } +} + +/* Only successful stops, and probes that found the resource inactive, get locks + * recorded in the history. This ensures the resource stays locked to the node + * until it is active there again after the node comes back up. + */ +static bool +should_preserve_lock(lrmd_event_data_t *op) +{ + if (!pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { + return false; + } + if (!strcmp(op->op_type, RSC_STOP) && (op->rc == PCMK_OCF_OK)) { + return true; + } + if (!strcmp(op->op_type, RSC_STATUS) && (op->rc == PCMK_OCF_NOT_RUNNING)) { + return true; + } + return false; +} + +/*! + * \internal + * \brief Request a CIB update + * + * \param[in] section Section of CIB to update + * \param[in,out] data New XML of CIB section to update + * \param[in] options CIB call options + * \param[in] callback If not NULL, set this as the operation callback + * + * \return Standard Pacemaker return code + * + * \note If \p callback is \p cib_rsc_callback(), the CIB update's call ID is + * stored in \p pending_rsc_update on success. + */ +int +controld_update_cib(const char *section, xmlNode *data, int options, + void (*callback)(xmlNode *, int, int, xmlNode *, void *)) +{ + int cib_rc = -ENOTCONN; + + CRM_ASSERT(data != NULL); + + if (controld_globals.cib_conn != NULL) { + cib_rc = cib_internal_op(controld_globals.cib_conn, + PCMK__CIB_REQUEST_MODIFY, NULL, section, + data, NULL, options, NULL); + if (cib_rc >= 0) { + crm_debug("Submitted CIB update %d for %s section", + cib_rc, section); + } + } + + if (callback == NULL) { + if (cib_rc < 0) { + crm_err("Failed to update CIB %s section: %s", + section, pcmk_rc_str(pcmk_legacy2rc(cib_rc))); + } + + } else { + if ((cib_rc >= 0) && (callback == cib_rsc_callback)) { + /* Checking for a particular callback is a little hacky, but it + * didn't seem worth adding an output argument for cib_rc for just + * one use case. + */ + pending_rsc_update = cib_rc; + } + fsa_register_cib_callback(cib_rc, NULL, callback); + } + + return (cib_rc >= 0)? pcmk_rc_ok : pcmk_legacy2rc(cib_rc); +} + +/*! + * \internal + * \brief Update resource history entry in CIB + * + * \param[in] node_name Node where action occurred + * \param[in] rsc Resource that action is for + * \param[in,out] op Action to record + * \param[in] lock_time If nonzero, when resource was locked to node + * + * \note On success, the CIB update's call ID will be stored in + * pending_rsc_update. + */ +void +controld_update_resource_history(const char *node_name, + const lrmd_rsc_info_t *rsc, + lrmd_event_data_t *op, time_t lock_time) +{ + xmlNode *update = NULL; + xmlNode *xml = NULL; + int call_opt = crmd_cib_smart_opt(); + const char *node_id = NULL; + const char *container = NULL; + + CRM_CHECK((node_name != NULL) && (op != NULL), return); + + if (rsc == NULL) { + crm_warn("Resource %s no longer exists in the executor", op->rsc_id); + controld_ack_event_directly(NULL, NULL, rsc, op, op->rsc_id); + return; + } + + // <status> + update = create_xml_node(NULL, XML_CIB_TAG_STATUS); + + // <node_state ...> + xml = create_xml_node(update, XML_CIB_TAG_STATE); + if (pcmk__str_eq(node_name, controld_globals.our_nodename, + pcmk__str_casei)) { + node_id = controld_globals.our_uuid; + } else { + node_id = node_name; + pcmk__xe_set_bool_attr(xml, XML_NODE_IS_REMOTE, true); + } + crm_xml_add(xml, XML_ATTR_ID, node_id); + crm_xml_add(xml, XML_ATTR_UNAME, node_name); + crm_xml_add(xml, XML_ATTR_ORIGIN, __func__); + + // <lrm ...> + xml = create_xml_node(xml, XML_CIB_TAG_LRM); + crm_xml_add(xml, XML_ATTR_ID, node_id); + + // <lrm_resources> + xml = create_xml_node(xml, XML_LRM_TAG_RESOURCES); + + // <lrm_resource ...> + xml = create_xml_node(xml, XML_LRM_TAG_RESOURCE); + crm_xml_add(xml, XML_ATTR_ID, op->rsc_id); + crm_xml_add(xml, XML_AGENT_ATTR_CLASS, rsc->standard); + crm_xml_add(xml, XML_AGENT_ATTR_PROVIDER, rsc->provider); + crm_xml_add(xml, XML_ATTR_TYPE, rsc->type); + if (lock_time != 0) { + /* Actions on a locked resource should either preserve the lock by + * recording it with the action result, or clear it. + */ + if (!should_preserve_lock(op)) { + lock_time = 0; + } + crm_xml_add_ll(xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK, + (long long) lock_time); + } + if (op->params != NULL) { + container = g_hash_table_lookup(op->params, + CRM_META "_" XML_RSC_ATTR_CONTAINER); + if (container != NULL) { + crm_trace("Resource %s is a part of container resource %s", + op->rsc_id, container); + crm_xml_add(xml, XML_RSC_ATTR_CONTAINER, container); + } + } + + // <lrm_resource_op ...> (possibly more than one) + controld_add_resource_history_xml(xml, rsc, op, node_name); + + /* Update CIB asynchronously. Even if it fails, the resource state should be + * discovered during the next election. Worst case, the node is wrongly + * fenced for running a resource it isn't. + */ + crm_log_xml_trace(update, __func__); + controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, cib_rsc_callback); + free_xml(update); +} + +/*! + * \internal + * \brief Erase an LRM history entry from the CIB, given the operation data + * + * \param[in] op Operation whose history should be deleted + */ +void +controld_delete_action_history(const lrmd_event_data_t *op) +{ + xmlNode *xml_top = NULL; + + CRM_CHECK(op != NULL, return); + + xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); + crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); + crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); + + if (op->interval_ms > 0) { + char *op_id = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms); + + /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ + crm_xml_add(xml_top, XML_ATTR_ID, op_id); + free(op_id); + } + + crm_debug("Erasing resource operation history for " PCMK__OP_FMT " (call=%d)", + op->rsc_id, op->op_type, op->interval_ms, op->call_id); + + controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, + XML_CIB_TAG_STATUS, xml_top, + cib_none); + + crm_log_xml_trace(xml_top, "op:cancel"); + free_xml(xml_top); +} + +/* Define xpath to find LRM resource history entry by node and resource */ +#define XPATH_HISTORY \ + "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ + "/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \ + "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ + "/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \ + "/" XML_LRM_TAG_RSC_OP + +/* ... and also by operation key */ +#define XPATH_HISTORY_ID XPATH_HISTORY \ + "[@" XML_ATTR_ID "='%s']" + +/* ... and also by operation key and operation call ID */ +#define XPATH_HISTORY_CALL XPATH_HISTORY \ + "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_CALLID "='%d']" + +/* ... and also by operation key and original operation key */ +#define XPATH_HISTORY_ORIG XPATH_HISTORY \ + "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_TASK_KEY "='%s']" + +/*! + * \internal + * \brief Delete a last_failure resource history entry from the CIB + * + * \param[in] rsc_id Name of resource to clear history for + * \param[in] node Name of node to clear history for + * \param[in] action If specified, delete only if this was failed action + * \param[in] interval_ms If \p action is specified, it has this interval + */ +void +controld_cib_delete_last_failure(const char *rsc_id, const char *node, + const char *action, guint interval_ms) +{ + char *xpath = NULL; + char *last_failure_key = NULL; + + CRM_CHECK((rsc_id != NULL) && (node != NULL), return); + + // Generate XPath to match desired entry + last_failure_key = pcmk__op_key(rsc_id, "last_failure", 0); + if (action == NULL) { + xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id, + last_failure_key); + } else { + char *action_key = pcmk__op_key(rsc_id, action, interval_ms); + + xpath = crm_strdup_printf(XPATH_HISTORY_ORIG, node, rsc_id, + last_failure_key, action_key); + free(action_key); + } + free(last_failure_key); + + controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath, + NULL, cib_xpath); + free(xpath); +} + +/*! + * \internal + * \brief Delete resource history entry from the CIB, given operation key + * + * \param[in] rsc_id Name of resource to clear history for + * \param[in] node Name of node to clear history for + * \param[in] key Operation key of operation to clear history for + * \param[in] call_id If specified, delete entry only if it has this call ID + */ +void +controld_delete_action_history_by_key(const char *rsc_id, const char *node, + const char *key, int call_id) +{ + char *xpath = NULL; + + CRM_CHECK((rsc_id != NULL) && (node != NULL) && (key != NULL), return); + + if (call_id > 0) { + xpath = crm_strdup_printf(XPATH_HISTORY_CALL, node, rsc_id, key, + call_id); + } else { + xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id, key); + } + controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath, + NULL, cib_xpath); + free(xpath); +} |