diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 06:53:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 06:53:20 +0000 |
commit | e5a812082ae033afb1eed82c0f2df3d0f6bdc93f (patch) | |
tree | a6716c9275b4b413f6c9194798b34b91affb3cc7 /daemons/fenced/fenced_remote.c | |
parent | Initial commit. (diff) | |
download | pacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.tar.xz pacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.zip |
Adding upstream version 2.1.6.upstream/2.1.6
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'daemons/fenced/fenced_remote.c')
-rw-r--r-- | daemons/fenced/fenced_remote.c | 2509 |
1 files changed, 2509 insertions, 0 deletions
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c new file mode 100644 index 0000000..dc67947 --- /dev/null +++ b/daemons/fenced/fenced_remote.c @@ -0,0 +1,2509 @@ +/* + * Copyright 2009-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <sys/param.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/stat.h> +#include <unistd.h> +#include <sys/utsname.h> + +#include <stdlib.h> +#include <errno.h> +#include <fcntl.h> +#include <ctype.h> +#include <regex.h> + +#include <crm/crm.h> +#include <crm/msg_xml.h> +#include <crm/common/ipc.h> +#include <crm/common/ipc_internal.h> +#include <crm/cluster/internal.h> + +#include <crm/stonith-ng.h> +#include <crm/fencing/internal.h> +#include <crm/common/xml.h> +#include <crm/common/xml_internal.h> + +#include <crm/common/util.h> +#include <pacemaker-fenced.h> + +#define TIMEOUT_MULTIPLY_FACTOR 1.2 + +/* When one fencer queries its peers for devices able to handle a fencing + * request, each peer will reply with a list of such devices available to it. + * Each reply will be parsed into a peer_device_info_t, with each device's + * information kept in a device_properties_t. + */ + +typedef struct device_properties_s { + /* Whether access to this device has been verified */ + gboolean verified; + + /* The remaining members are indexed by the operation's "phase" */ + + /* Whether this device has been executed in each phase */ + gboolean executed[st_phase_max]; + /* Whether this device is disallowed from executing in each phase */ + gboolean disallowed[st_phase_max]; + /* Action-specific timeout for each phase */ + int custom_action_timeout[st_phase_max]; + /* Action-specific maximum random delay for each phase */ + int delay_max[st_phase_max]; + /* Action-specific base delay for each phase */ + int delay_base[st_phase_max]; + /* Group of enum st_device_flags */ + uint32_t device_support_flags; +} device_properties_t; + +typedef struct { + /* Name of peer that sent this result */ + char *host; + /* Only try peers for non-topology based operations once */ + gboolean tried; + /* Number of entries in the devices table */ + int ndevices; + /* Devices available to this host that are capable of fencing the target */ + GHashTable *devices; +} peer_device_info_t; + +GHashTable *stonith_remote_op_list = NULL; + +extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, + int call_options); + +static void request_peer_fencing(remote_fencing_op_t *op, + peer_device_info_t *peer); +static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup); +static void report_timeout_period(remote_fencing_op_t * op, int op_timeout); +static int get_op_total_timeout(const remote_fencing_op_t *op, + const peer_device_info_t *chosen_peer); + +static gint +sort_strings(gconstpointer a, gconstpointer b) +{ + return strcmp(a, b); +} + +static void +free_remote_query(gpointer data) +{ + if (data != NULL) { + peer_device_info_t *peer = data; + + g_hash_table_destroy(peer->devices); + free(peer->host); + free(peer); + } +} + +void +free_stonith_remote_op_list(void) +{ + if (stonith_remote_op_list != NULL) { + g_hash_table_destroy(stonith_remote_op_list); + stonith_remote_op_list = NULL; + } +} + +struct peer_count_data { + const remote_fencing_op_t *op; + gboolean verified_only; + uint32_t support_action_only; + int count; +}; + +/*! + * \internal + * \brief Increment a counter if a device has not been executed yet + * + * \param[in] key Device ID (ignored) + * \param[in] value Device properties + * \param[in,out] user_data Peer count data + */ +static void +count_peer_device(gpointer key, gpointer value, gpointer user_data) +{ + device_properties_t *props = (device_properties_t*)value; + struct peer_count_data *data = user_data; + + if (!props->executed[data->op->phase] + && (!data->verified_only || props->verified) + && ((data->support_action_only == st_device_supports_none) || pcmk_is_set(props->device_support_flags, data->support_action_only))) { + ++(data->count); + } +} + +/*! + * \internal + * \brief Check the number of available devices in a peer's query results + * + * \param[in] op Operation that results are for + * \param[in] peer Peer to count + * \param[in] verified_only Whether to count only verified devices + * \param[in] support_action_only Whether to count only devices that support action + * + * \return Number of devices available to peer that were not already executed + */ +static int +count_peer_devices(const remote_fencing_op_t *op, + const peer_device_info_t *peer, gboolean verified_only, uint32_t support_on_action_only) +{ + struct peer_count_data data; + + data.op = op; + data.verified_only = verified_only; + data.support_action_only = support_on_action_only; + data.count = 0; + if (peer) { + g_hash_table_foreach(peer->devices, count_peer_device, &data); + } + return data.count; +} + +/*! + * \internal + * \brief Search for a device in a query result + * + * \param[in] op Operation that result is for + * \param[in] peer Query result for a peer + * \param[in] device Device ID to search for + * + * \return Device properties if found, NULL otherwise + */ +static device_properties_t * +find_peer_device(const remote_fencing_op_t *op, const peer_device_info_t *peer, + const char *device, uint32_t support_action_only) +{ + device_properties_t *props = g_hash_table_lookup(peer->devices, device); + + if (props && support_action_only != st_device_supports_none && !pcmk_is_set(props->device_support_flags, support_action_only)) { + return NULL; + } + return (props && !props->executed[op->phase] + && !props->disallowed[op->phase])? props : NULL; +} + +/*! + * \internal + * \brief Find a device in a peer's device list and mark it as executed + * + * \param[in] op Operation that peer result is for + * \param[in,out] peer Peer with results to search + * \param[in] device ID of device to mark as done + * \param[in] verified_devices_only Only consider verified devices + * + * \return TRUE if device was found and marked, FALSE otherwise + */ +static gboolean +grab_peer_device(const remote_fencing_op_t *op, peer_device_info_t *peer, + const char *device, gboolean verified_devices_only) +{ + device_properties_t *props = find_peer_device(op, peer, device, + fenced_support_flag(op->action)); + + if ((props == NULL) || (verified_devices_only && !props->verified)) { + return FALSE; + } + + crm_trace("Removing %s from %s (%d remaining)", + device, peer->host, count_peer_devices(op, peer, FALSE, st_device_supports_none)); + props->executed[op->phase] = TRUE; + return TRUE; +} + +static void +clear_remote_op_timers(remote_fencing_op_t * op) +{ + if (op->query_timer) { + g_source_remove(op->query_timer); + op->query_timer = 0; + } + if (op->op_timer_total) { + g_source_remove(op->op_timer_total); + op->op_timer_total = 0; + } + if (op->op_timer_one) { + g_source_remove(op->op_timer_one); + op->op_timer_one = 0; + } +} + +static void +free_remote_op(gpointer data) +{ + remote_fencing_op_t *op = data; + + crm_log_xml_debug(op->request, "Destroying"); + + clear_remote_op_timers(op); + + free(op->id); + free(op->action); + free(op->delegate); + free(op->target); + free(op->client_id); + free(op->client_name); + free(op->originator); + + if (op->query_results) { + g_list_free_full(op->query_results, free_remote_query); + } + if (op->request) { + free_xml(op->request); + op->request = NULL; + } + if (op->devices_list) { + g_list_free_full(op->devices_list, free); + op->devices_list = NULL; + } + g_list_free_full(op->automatic_list, free); + g_list_free(op->duplicates); + + pcmk__reset_result(&op->result); + free(op); +} + +void +init_stonith_remote_op_hash_table(GHashTable **table) +{ + if (*table == NULL) { + *table = pcmk__strkey_table(NULL, free_remote_op); + } +} + +/*! + * \internal + * \brief Return an operation's originally requested action (before any remap) + * + * \param[in] op Operation to check + * + * \return Operation's original action + */ +static const char * +op_requested_action(const remote_fencing_op_t *op) +{ + return ((op->phase > st_phase_requested)? "reboot" : op->action); +} + +/*! + * \internal + * \brief Remap a "reboot" operation to the "off" phase + * + * \param[in,out] op Operation to remap + */ +static void +op_phase_off(remote_fencing_op_t *op) +{ + crm_info("Remapping multiple-device reboot targeting %s to 'off' " + CRM_XS " id=%.8s", op->target, op->id); + op->phase = st_phase_off; + + /* Happily, "off" and "on" are shorter than "reboot", so we can reuse the + * memory allocation at each phase. + */ + strcpy(op->action, "off"); +} + +/*! + * \internal + * \brief Advance a remapped reboot operation to the "on" phase + * + * \param[in,out] op Operation to remap + */ +static void +op_phase_on(remote_fencing_op_t *op) +{ + GList *iter = NULL; + + crm_info("Remapped 'off' targeting %s complete, " + "remapping to 'on' for %s " CRM_XS " id=%.8s", + op->target, op->client_name, op->id); + op->phase = st_phase_on; + strcpy(op->action, "on"); + + /* Skip devices with automatic unfencing, because the cluster will handle it + * when the node rejoins. + */ + for (iter = op->automatic_list; iter != NULL; iter = iter->next) { + GList *match = g_list_find_custom(op->devices_list, iter->data, + sort_strings); + + if (match) { + op->devices_list = g_list_remove(op->devices_list, match->data); + } + } + g_list_free_full(op->automatic_list, free); + op->automatic_list = NULL; + + /* Rewind device list pointer */ + op->devices = op->devices_list; +} + +/*! + * \internal + * \brief Reset a remapped reboot operation + * + * \param[in,out] op Operation to reset + */ +static void +undo_op_remap(remote_fencing_op_t *op) +{ + if (op->phase > 0) { + crm_info("Undoing remap of reboot targeting %s for %s " + CRM_XS " id=%.8s", op->target, op->client_name, op->id); + op->phase = st_phase_requested; + strcpy(op->action, "reboot"); + } +} + +/*! + * \internal + * \brief Create notification data XML for a fencing operation result + * + * \param[in] op Fencer operation that completed + * + * \return Newly created XML to add as notification data + * \note The caller is responsible for freeing the result. + */ +static xmlNode * +fencing_result2xml(const remote_fencing_op_t *op) +{ + xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); + + crm_xml_add_int(notify_data, "state", op->state); + crm_xml_add(notify_data, F_STONITH_TARGET, op->target); + crm_xml_add(notify_data, F_STONITH_ACTION, op->action); + crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate); + crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id); + crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator); + crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id); + crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name); + + return notify_data; +} + +/*! + * \internal + * \brief Broadcast a fence result notification to all CPG peers + * + * \param[in] op Fencer operation that completed + * \param[in] op_merged Whether this operation is a duplicate of another + */ +void +fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged) +{ + static int count = 0; + xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY); + xmlNode *notify_data = fencing_result2xml(op); + + count++; + crm_trace("Broadcasting result to peers"); + crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY); + crm_xml_add(bcast, F_SUBTYPE, "broadcast"); + crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY); + crm_xml_add_int(bcast, "count", count); + + if (op_merged) { + pcmk__xe_set_bool_attr(bcast, F_STONITH_MERGED, true); + } + + stonith__xe_set_result(notify_data, &op->result); + + add_message_xml(bcast, F_STONITH_CALLDATA, notify_data); + send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE); + free_xml(notify_data); + free_xml(bcast); + + return; +} + +/*! + * \internal + * \brief Reply to a local request originator and notify all subscribed clients + * + * \param[in,out] op Fencer operation that completed + * \param[in,out] data Top-level XML to add notification to + */ +static void +handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data) +{ + xmlNode *notify_data = NULL; + xmlNode *reply = NULL; + pcmk__client_t *client = NULL; + + if (op->notify_sent == TRUE) { + /* nothing to do */ + return; + } + + /* Do notification with a clean data object */ + crm_xml_add_int(data, "state", op->state); + crm_xml_add(data, F_STONITH_TARGET, op->target); + crm_xml_add(data, F_STONITH_OPERATION, op->action); + + reply = fenced_construct_reply(op->request, data, &op->result); + crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate); + + /* Send fencing OP reply to local client that initiated fencing */ + client = pcmk__find_client_by_id(op->client_id); + if (client == NULL) { + crm_trace("Skipping reply to %s: no longer a client", op->client_id); + } else { + do_local_reply(reply, client, op->call_options); + } + + /* bcast to all local clients that the fencing operation happend */ + notify_data = fencing_result2xml(op); + fenced_send_notification(T_STONITH_NOTIFY_FENCE, &op->result, notify_data); + free_xml(notify_data); + fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL); + + /* mark this op as having notify's already sent */ + op->notify_sent = TRUE; + free_xml(reply); +} + +/*! + * \internal + * \brief Finalize all duplicates of a given fencer operation + * + * \param[in,out] op Fencer operation that completed + * \param[in,out] data Top-level XML to add notification to + */ +static void +finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data) +{ + for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) { + remote_fencing_op_t *other = iter->data; + + if (other->state == st_duplicate) { + other->state = op->state; + crm_debug("Performing duplicate notification for %s@%s: %s " + CRM_XS " id=%.8s", + other->client_name, other->originator, + pcmk_exec_status_str(op->result.execution_status), + other->id); + pcmk__copy_result(&op->result, &other->result); + finalize_op(other, data, true); + + } else { + // Possible if (for example) it timed out already + crm_err("Skipping duplicate notification for %s@%s " + CRM_XS " state=%s id=%.8s", + other->client_name, other->originator, + stonith_op_state_str(other->state), other->id); + } + } +} + +static char * +delegate_from_xml(xmlNode *xml) +{ + xmlNode *match = get_xpath_object("//@" F_STONITH_DELEGATE, xml, LOG_NEVER); + + if (match == NULL) { + return crm_element_value_copy(xml, F_ORIG); + } else { + return crm_element_value_copy(match, F_STONITH_DELEGATE); + } +} + +/*! + * \internal + * \brief Finalize a peer fencing operation + * + * Clean up after a fencing operation completes. This function has two code + * paths: the executioner uses it to broadcast the result to CPG peers, and then + * each peer (including the executioner) uses it to process that broadcast and + * notify its IPC clients of the result. + * + * \param[in,out] op Fencer operation that completed + * \param[in,out] data If not NULL, XML reply of last delegated operation + * \param[in] dup Whether this operation is a duplicate of another + * (in which case, do not broadcast the result) + * + * \note The operation result should be set before calling this function. + */ +static void +finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup) +{ + int level = LOG_ERR; + const char *subt = NULL; + xmlNode *local_data = NULL; + gboolean op_merged = FALSE; + + CRM_CHECK((op != NULL), return); + + // This is a no-op if timers have already been cleared + clear_remote_op_timers(op); + + if (op->notify_sent) { + // Most likely, this is a timed-out action that eventually completed + crm_notice("Operation '%s'%s%s by %s for %s@%s%s: " + "Result arrived too late " CRM_XS " id=%.8s", + op->action, (op->target? " targeting " : ""), + (op->target? op->target : ""), + (op->delegate? op->delegate : "unknown node"), + op->client_name, op->originator, + (op_merged? " (merged)" : ""), + op->id); + return; + } + + set_fencing_completed(op); + undo_op_remap(op); + + if (data == NULL) { + data = create_xml_node(NULL, "remote-op"); + local_data = data; + + } else if (op->delegate == NULL) { + switch (op->result.execution_status) { + case PCMK_EXEC_NO_FENCE_DEVICE: + break; + + case PCMK_EXEC_INVALID: + if (op->result.exit_status != CRM_EX_EXPIRED) { + op->delegate = delegate_from_xml(data); + } + break; + + default: + op->delegate = delegate_from_xml(data); + break; + } + } + + if (dup || (crm_element_value(data, F_STONITH_MERGED) != NULL)) { + op_merged = true; + } + + /* Tell everyone the operation is done, we will continue + * with doing the local notifications once we receive + * the broadcast back. */ + subt = crm_element_value(data, F_SUBTYPE); + if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) { + /* Defer notification until the bcast message arrives */ + fenced_broadcast_op_result(op, op_merged); + free_xml(local_data); + return; + } + + if (pcmk__result_ok(&op->result) || dup + || !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) { + level = LOG_NOTICE; + } + do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s (%s%s%s) " + CRM_XS " id=%.8s", op->action, (op->target? " targeting " : ""), + (op->target? op->target : ""), + (op->delegate? op->delegate : "unknown node"), + op->client_name, op->originator, + (op_merged? " (merged)" : ""), + crm_exit_str(op->result.exit_status), + pcmk_exec_status_str(op->result.execution_status), + ((op->result.exit_reason == NULL)? "" : ": "), + ((op->result.exit_reason == NULL)? "" : op->result.exit_reason), + op->id); + + handle_local_reply_and_notify(op, data); + + if (!dup) { + finalize_op_duplicates(op, data); + } + + /* Free non-essential parts of the record + * Keep the record around so we can query the history + */ + if (op->query_results) { + g_list_free_full(op->query_results, free_remote_query); + op->query_results = NULL; + } + if (op->request) { + free_xml(op->request); + op->request = NULL; + } + + free_xml(local_data); +} + +/*! + * \internal + * \brief Finalize a watchdog fencer op after the waiting time expires + * + * \param[in,out] userdata Fencer operation that completed + * + * \return G_SOURCE_REMOVE (which tells glib not to restart timer) + */ +static gboolean +remote_op_watchdog_done(gpointer userdata) +{ + remote_fencing_op_t *op = userdata; + + op->op_timer_one = 0; + + crm_notice("Self-fencing (%s) by %s for %s assumed complete " + CRM_XS " id=%.8s", + op->action, op->target, op->client_name, op->id); + op->state = st_done; + pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + finalize_op(op, NULL, false); + return G_SOURCE_REMOVE; +} + +static gboolean +remote_op_timeout_one(gpointer userdata) +{ + remote_fencing_op_t *op = userdata; + + op->op_timer_one = 0; + + crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS + " id=%.8s", op->action, op->target, op->client_name, op->id); + pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, + "Peer did not return fence result within timeout"); + + // The requested delay has been applied for the first device + if (op->delay > 0) { + op->delay = 0; + crm_trace("Try another device for '%s' action targeting %s " + "for client %s without delay " CRM_XS " id=%.8s", + op->action, op->target, op->client_name, op->id); + } + + // Try another device, if appropriate + request_peer_fencing(op, NULL); + return G_SOURCE_REMOVE; +} + +/*! + * \internal + * \brief Finalize a remote fencer operation that timed out + * + * \param[in,out] op Fencer operation that timed out + * \param[in] reason Readable description of what step timed out + */ +static void +finalize_timed_out_op(remote_fencing_op_t *op, const char *reason) +{ + crm_debug("Action '%s' targeting %s for client %s timed out " + CRM_XS " id=%.8s", + op->action, op->target, op->client_name, op->id); + + if (op->phase == st_phase_on) { + /* A remapped reboot operation timed out in the "on" phase, but the + * "off" phase completed successfully, so quit trying any further + * devices, and return success. + */ + op->state = st_done; + pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + } else { + op->state = st_failed; + pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason); + } + finalize_op(op, NULL, false); +} + +/*! + * \internal + * \brief Finalize a remote fencer operation that timed out + * + * \param[in,out] userdata Fencer operation that timed out + * + * \return G_SOURCE_REMOVE (which tells glib not to restart timer) + */ +static gboolean +remote_op_timeout(gpointer userdata) +{ + remote_fencing_op_t *op = userdata; + + op->op_timer_total = 0; + + if (op->state == st_done) { + crm_debug("Action '%s' targeting %s for client %s already completed " + CRM_XS " id=%.8s", + op->action, op->target, op->client_name, op->id); + } else { + finalize_timed_out_op(userdata, "Fencing did not complete within a " + "total timeout based on the " + "configured timeout and retries for " + "any devices attempted"); + } + return G_SOURCE_REMOVE; +} + +static gboolean +remote_op_query_timeout(gpointer data) +{ + remote_fencing_op_t *op = data; + + op->query_timer = 0; + + if (op->state == st_done) { + crm_debug("Operation %.8s targeting %s already completed", + op->id, op->target); + } else if (op->state == st_exec) { + crm_debug("Operation %.8s targeting %s already in progress", + op->id, op->target); + } else if (op->query_results) { + // Query succeeded, so attempt the actual fencing + crm_debug("Query %.8s targeting %s complete (state=%s)", + op->id, op->target, stonith_op_state_str(op->state)); + request_peer_fencing(op, NULL); + } else { + crm_debug("Query %.8s targeting %s timed out (state=%s)", + op->id, op->target, stonith_op_state_str(op->state)); + finalize_timed_out_op(op, "No capable peers replied to device query " + "within timeout"); + } + + return G_SOURCE_REMOVE; +} + +static gboolean +topology_is_empty(stonith_topology_t *tp) +{ + int i; + + if (tp == NULL) { + return TRUE; + } + + for (i = 0; i < ST_LEVEL_MAX; i++) { + if (tp->levels[i] != NULL) { + return FALSE; + } + } + return TRUE; +} + +/*! + * \internal + * \brief Add a device to an operation's automatic unfencing list + * + * \param[in,out] op Operation to modify + * \param[in] device Device ID to add + */ +static void +add_required_device(remote_fencing_op_t *op, const char *device) +{ + GList *match = g_list_find_custom(op->automatic_list, device, + sort_strings); + + if (!match) { + op->automatic_list = g_list_prepend(op->automatic_list, strdup(device)); + } +} + +/*! + * \internal + * \brief Remove a device from the automatic unfencing list + * + * \param[in,out] op Operation to modify + * \param[in] device Device ID to remove + */ +static void +remove_required_device(remote_fencing_op_t *op, const char *device) +{ + GList *match = g_list_find_custom(op->automatic_list, device, + sort_strings); + + if (match) { + op->automatic_list = g_list_remove(op->automatic_list, match->data); + } +} + +/* deep copy the device list */ +static void +set_op_device_list(remote_fencing_op_t * op, GList *devices) +{ + GList *lpc = NULL; + + if (op->devices_list) { + g_list_free_full(op->devices_list, free); + op->devices_list = NULL; + } + for (lpc = devices; lpc != NULL; lpc = lpc->next) { + op->devices_list = g_list_append(op->devices_list, strdup(lpc->data)); + } + op->devices = op->devices_list; +} + +/*! + * \internal + * \brief Check whether a node matches a topology target + * + * \param[in] tp Topology table entry to check + * \param[in] node Name of node to check + * + * \return TRUE if node matches topology target + */ +static gboolean +topology_matches(const stonith_topology_t *tp, const char *node) +{ + regex_t r_patt; + + CRM_CHECK(node && tp && tp->target, return FALSE); + switch (tp->kind) { + case fenced_target_by_attribute: + /* This level targets by attribute, so tp->target is a NAME=VALUE pair + * of a permanent attribute applied to targeted nodes. The test below + * relies on the locally cached copy of the CIB, so if fencing needs to + * be done before the initial CIB is received or after a malformed CIB + * is received, then the topology will be unable to be used. + */ + if (node_has_attr(node, tp->target_attribute, tp->target_value)) { + crm_notice("Matched %s with %s by attribute", node, tp->target); + return TRUE; + } + break; + + case fenced_target_by_pattern: + /* This level targets node names matching a pattern, so tp->target + * (and tp->target_pattern) is a regular expression. + */ + if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) { + crm_info("Bad regex '%s' for fencing level", tp->target); + } else { + int status = regexec(&r_patt, node, 0, NULL, 0); + + regfree(&r_patt); + if (status == 0) { + crm_notice("Matched %s with %s by name", node, tp->target); + return TRUE; + } + } + break; + + case fenced_target_by_name: + crm_trace("Testing %s against %s", node, tp->target); + return pcmk__str_eq(tp->target, node, pcmk__str_casei); + + default: + break; + } + crm_trace("No match for %s with %s", node, tp->target); + return FALSE; +} + +stonith_topology_t * +find_topology_for_host(const char *host) +{ + GHashTableIter tIter; + stonith_topology_t *tp = g_hash_table_lookup(topology, host); + + if(tp != NULL) { + crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology)); + return tp; + } + + g_hash_table_iter_init(&tIter, topology); + while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) { + if (topology_matches(tp, host)) { + crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology)); + return tp; + } + } + + crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology)); + return NULL; +} + +/*! + * \internal + * \brief Set fencing operation's device list to target's next topology level + * + * \param[in,out] op Remote fencing operation to modify + * \param[in] empty_ok If true, an operation without a target (i.e. + * queries) or a target without a topology will get a + * pcmk_rc_ok return value instead of ENODEV + * + * \return Standard Pacemaker return value + */ +static int +advance_topology_level(remote_fencing_op_t *op, bool empty_ok) +{ + stonith_topology_t *tp = NULL; + + if (op->target) { + tp = find_topology_for_host(op->target); + } + if (topology_is_empty(tp)) { + return empty_ok? pcmk_rc_ok : ENODEV; + } + + CRM_ASSERT(tp->levels != NULL); + + stonith__set_call_options(op->call_options, op->id, st_opt_topology); + + /* This is a new level, so undo any remapping left over from previous */ + undo_op_remap(op); + + do { + op->level++; + + } while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL); + + if (op->level < ST_LEVEL_MAX) { + crm_trace("Attempting fencing level %d targeting %s (%d devices) " + "for client %s@%s (id=%.8s)", + op->level, op->target, g_list_length(tp->levels[op->level]), + op->client_name, op->originator, op->id); + set_op_device_list(op, tp->levels[op->level]); + + // The requested delay has been applied for the first fencing level + if (op->level > 1 && op->delay > 0) { + op->delay = 0; + } + + if ((g_list_next(op->devices_list) != NULL) + && pcmk__str_eq(op->action, "reboot", pcmk__str_none)) { + /* A reboot has been requested for a topology level with multiple + * devices. Instead of rebooting the devices sequentially, we will + * turn them all off, then turn them all on again. (Think about + * switched power outlets for redundant power supplies.) + */ + op_phase_off(op); + } + return pcmk_rc_ok; + } + + crm_info("All %sfencing options targeting %s for client %s@%s failed " + CRM_XS " id=%.8s", + (stonith_watchdog_timeout_ms > 0)?"non-watchdog ":"", + op->target, op->client_name, op->originator, op->id); + return ENODEV; +} + +/*! + * \internal + * \brief If fencing operation is a duplicate, merge it into the other one + * + * \param[in,out] op Fencing operation to check + */ +static void +merge_duplicates(remote_fencing_op_t *op) +{ + GHashTableIter iter; + remote_fencing_op_t *other = NULL; + + time_t now = time(NULL); + + g_hash_table_iter_init(&iter, stonith_remote_op_list); + while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) { + const char *other_action = op_requested_action(other); + + if (!strcmp(op->id, other->id)) { + continue; // Don't compare against self + } + if (other->state > st_exec) { + crm_trace("%.8s not duplicate of %.8s: not in progress", + op->id, other->id); + continue; + } + if (!pcmk__str_eq(op->target, other->target, pcmk__str_casei)) { + crm_trace("%.8s not duplicate of %.8s: node %s vs. %s", + op->id, other->id, op->target, other->target); + continue; + } + if (!pcmk__str_eq(op->action, other_action, pcmk__str_none)) { + crm_trace("%.8s not duplicate of %.8s: action %s vs. %s", + op->id, other->id, op->action, other_action); + continue; + } + if (pcmk__str_eq(op->client_name, other->client_name, pcmk__str_casei)) { + crm_trace("%.8s not duplicate of %.8s: same client %s", + op->id, other->id, op->client_name); + continue; + } + if (pcmk__str_eq(other->target, other->originator, pcmk__str_casei)) { + crm_trace("%.8s not duplicate of %.8s: suicide for %s", + op->id, other->id, other->target); + continue; + } + if (!fencing_peer_active(crm_get_peer(0, other->originator))) { + crm_notice("Failing action '%s' targeting %s originating from " + "client %s@%s: Originator is dead " CRM_XS " id=%.8s", + other->action, other->target, other->client_name, + other->originator, other->id); + crm_trace("%.8s not duplicate of %.8s: originator dead", + op->id, other->id); + other->state = st_failed; + continue; + } + if ((other->total_timeout > 0) + && (now > (other->total_timeout + other->created))) { + crm_trace("%.8s not duplicate of %.8s: old (%ld vs. %ld + %d)", + op->id, other->id, now, other->created, + other->total_timeout); + continue; + } + + /* There is another in-flight request to fence the same host + * Piggyback on that instead. If it fails, so do we. + */ + other->duplicates = g_list_append(other->duplicates, op); + if (other->total_timeout == 0) { + other->total_timeout = op->total_timeout = + TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL); + crm_trace("Best guess as to timeout used for %.8s: %d", + other->id, other->total_timeout); + } + crm_notice("Merging fencing action '%s' targeting %s originating from " + "client %s with identical request from %s@%s " + CRM_XS " original=%.8s duplicate=%.8s total_timeout=%ds", + op->action, op->target, op->client_name, + other->client_name, other->originator, + op->id, other->id, other->total_timeout); + report_timeout_period(op, other->total_timeout); + op->state = st_duplicate; + } +} + +static uint32_t fencing_active_peers(void) +{ + uint32_t count = 0; + crm_node_t *entry; + GHashTableIter gIter; + + g_hash_table_iter_init(&gIter, crm_peer_cache); + while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { + if(fencing_peer_active(entry)) { + count++; + } + } + return count; +} + +/*! + * \internal + * \brief Process a manual confirmation of a pending fence action + * + * \param[in] client IPC client that sent confirmation + * \param[in,out] msg Request XML with manual confirmation + * + * \return Standard Pacemaker return code + */ +int +fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg) +{ + remote_fencing_op_t *op = NULL; + xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); + + CRM_CHECK(dev != NULL, return EPROTO); + + crm_notice("Received manual confirmation that %s has been fenced", + pcmk__s(crm_element_value(dev, F_STONITH_TARGET), + "unknown target")); + op = initiate_remote_stonith_op(client, msg, TRUE); + if (op == NULL) { + return EPROTO; + } + op->state = st_done; + set_fencing_completed(op); + op->delegate = strdup("a human"); + + // For the fencer's purposes, the fencing operation is done + pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + finalize_op(op, msg, false); + + /* For the requester's purposes, the operation is still pending. The + * actual result will be sent asynchronously via the operation's done_cb(). + */ + return EINPROGRESS; +} + +/*! + * \internal + * \brief Create a new remote stonith operation + * + * \param[in] client ID of local stonith client that initiated the operation + * \param[in] request The request from the client that started the operation + * \param[in] peer TRUE if this operation is owned by another stonith peer + * (an operation owned by one peer is stored on all peers, + * but only the owner executes it; all nodes get the results + * once the owner finishes execution) + */ +void * +create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer) +{ + remote_fencing_op_t *op = NULL; + xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_NEVER); + int call_options = 0; + const char *operation = NULL; + + init_stonith_remote_op_hash_table(&stonith_remote_op_list); + + /* If this operation is owned by another node, check to make + * sure we haven't already created this operation. */ + if (peer && dev) { + const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); + + CRM_CHECK(op_id != NULL, return NULL); + + op = g_hash_table_lookup(stonith_remote_op_list, op_id); + if (op) { + crm_debug("Reusing existing remote fencing op %.8s for %s", + op_id, ((client == NULL)? "unknown client" : client)); + return op; + } + } + + op = calloc(1, sizeof(remote_fencing_op_t)); + CRM_ASSERT(op != NULL); + + crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout)); + // Value -1 means disable any static/random fencing delays + crm_element_value_int(request, F_STONITH_DELAY, &(op->delay)); + + if (peer && dev) { + op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID); + } else { + op->id = crm_generate_uuid(); + } + + g_hash_table_replace(stonith_remote_op_list, op->id, op); + + op->state = st_query; + op->replies_expected = fencing_active_peers(); + op->action = crm_element_value_copy(dev, F_STONITH_ACTION); + op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN); + op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE); /* May not be set */ + op->created = time(NULL); + + if (op->originator == NULL) { + /* Local or relayed request */ + op->originator = strdup(stonith_our_uname); + } + + CRM_LOG_ASSERT(client != NULL); + if (client) { + op->client_id = strdup(client); + } + + + /* For a RELAY operation, set fenced on the client. */ + operation = crm_element_value(request, F_STONITH_OPERATION); + + if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) { + op->client_name = crm_strdup_printf("%s.%lu", crm_system_name, + (unsigned long) getpid()); + } else { + op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME); + } + + op->target = crm_element_value_copy(dev, F_STONITH_TARGET); + op->request = copy_xml(request); /* TODO: Figure out how to avoid this */ + crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); + op->call_options = call_options; + + crm_element_value_int(request, F_STONITH_CALLID, &(op->client_callid)); + + crm_trace("%s new fencing op %s ('%s' targeting %s for client %s, " + "base timeout %d, %u %s expected)", + (peer && dev)? "Recorded" : "Generated", op->id, op->action, + op->target, op->client_name, op->base_timeout, + op->replies_expected, + pcmk__plural_alt(op->replies_expected, "reply", "replies")); + + if (op->call_options & st_opt_cs_nodeid) { + int nodeid; + crm_node_t *node; + + pcmk__scan_min_int(op->target, &nodeid, 0); + node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY); + + /* Ensure the conversion only happens once */ + stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid); + + if (node && node->uname) { + free(op->target); + op->target = strdup(node->uname); + + } else { + crm_warn("Could not expand nodeid '%s' into a host name", op->target); + } + } + + /* check to see if this is a duplicate operation of another in-flight operation */ + merge_duplicates(op); + + if (op->state != st_duplicate) { + /* kick history readers */ + fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL); + } + + /* safe to trim as long as that doesn't touch pending ops */ + stonith_fence_history_trim(); + + return op; +} + +/*! + * \internal + * \brief Create a peer fencing operation from a request, and initiate it + * + * \param[in] client IPC client that made request (NULL to get from request) + * \param[in] request Request XML + * \param[in] manual_ack Whether this is a manual action confirmation + * + * \return Newly created operation on success, otherwise NULL + */ +remote_fencing_op_t * +initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request, + gboolean manual_ack) +{ + int query_timeout = 0; + xmlNode *query = NULL; + const char *client_id = NULL; + remote_fencing_op_t *op = NULL; + const char *relay_op_id = NULL; + const char *operation = NULL; + + if (client) { + client_id = client->id; + } else { + client_id = crm_element_value(request, F_STONITH_CLIENTID); + } + + CRM_LOG_ASSERT(client_id != NULL); + op = create_remote_stonith_op(client_id, request, FALSE); + op->owner = TRUE; + if (manual_ack) { + return op; + } + + CRM_CHECK(op->action, return NULL); + + if (advance_topology_level(op, true) != pcmk_rc_ok) { + op->state = st_failed; + } + + switch (op->state) { + case st_failed: + // advance_topology_level() exhausted levels + pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, + "All topology levels failed"); + crm_warn("Could not request peer fencing (%s) targeting %s " + CRM_XS " id=%.8s", op->action, op->target, op->id); + finalize_op(op, NULL, false); + return op; + + case st_duplicate: + crm_info("Requesting peer fencing (%s) targeting %s (duplicate) " + CRM_XS " id=%.8s", op->action, op->target, op->id); + return op; + + default: + crm_notice("Requesting peer fencing (%s) targeting %s " + CRM_XS " id=%.8s state=%s base_timeout=%d", + op->action, op->target, op->id, + stonith_op_state_str(op->state), op->base_timeout); + } + + query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY, + NULL, op->call_options); + + crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id); + crm_xml_add(query, F_STONITH_TARGET, op->target); + crm_xml_add(query, F_STONITH_ACTION, op_requested_action(op)); + crm_xml_add(query, F_STONITH_ORIGIN, op->originator); + crm_xml_add(query, F_STONITH_CLIENTID, op->client_id); + crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name); + crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout); + + /* In case of RELAY operation, RELAY information is added to the query to delete the original operation of RELAY. */ + operation = crm_element_value(request, F_STONITH_OPERATION); + if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) { + relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID); + if (relay_op_id) { + crm_xml_add(query, F_STONITH_REMOTE_OP_ID_RELAY, relay_op_id); + } + } + + send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); + free_xml(query); + + query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR; + op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op); + + return op; +} + +enum find_best_peer_options { + /*! Skip checking the target peer for capable fencing devices */ + FIND_PEER_SKIP_TARGET = 0x0001, + /*! Only check the target peer for capable fencing devices */ + FIND_PEER_TARGET_ONLY = 0x0002, + /*! Skip peers and devices that are not verified */ + FIND_PEER_VERIFIED_ONLY = 0x0004, +}; + +static peer_device_info_t * +find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options) +{ + GList *iter = NULL; + gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE; + + if (!device && pcmk_is_set(op->call_options, st_opt_topology)) { + return NULL; + } + + for (iter = op->query_results; iter != NULL; iter = iter->next) { + peer_device_info_t *peer = iter->data; + + crm_trace("Testing result from %s targeting %s with %d device%s: %d %x", + peer->host, op->target, peer->ndevices, + pcmk__plural_s(peer->ndevices), peer->tried, options); + if ((options & FIND_PEER_SKIP_TARGET) && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { + continue; + } + if ((options & FIND_PEER_TARGET_ONLY) && !pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { + continue; + } + + if (pcmk_is_set(op->call_options, st_opt_topology)) { + + if (grab_peer_device(op, peer, device, verified_devices_only)) { + return peer; + } + + } else if (!peer->tried + && count_peer_devices(op, peer, verified_devices_only, + fenced_support_flag(op->action))) { + /* No topology: Use the current best peer */ + crm_trace("Simple fencing"); + return peer; + } + } + + return NULL; +} + +static peer_device_info_t * +stonith_choose_peer(remote_fencing_op_t * op) +{ + const char *device = NULL; + peer_device_info_t *peer = NULL; + uint32_t active = fencing_active_peers(); + + do { + if (op->devices) { + device = op->devices->data; + crm_trace("Checking for someone to fence (%s) %s using %s", + op->action, op->target, device); + } else { + crm_trace("Checking for someone to fence (%s) %s", + op->action, op->target); + } + + /* Best choice is a peer other than the target with verified access */ + peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY); + if (peer) { + crm_trace("Found verified peer %s for %s", peer->host, device?device:"<any>"); + return peer; + } + + if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) { + crm_trace("Waiting before looking for unverified devices to fence %s", op->target); + return NULL; + } + + /* If no other peer has verified access, next best is unverified access */ + peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET); + if (peer) { + crm_trace("Found best unverified peer %s", peer->host); + return peer; + } + + /* If no other peer can do it, last option is self-fencing + * (which is never allowed for the "on" phase of a remapped reboot) + */ + if (op->phase != st_phase_on) { + peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY); + if (peer) { + crm_trace("%s will fence itself", peer->host); + return peer; + } + } + + /* Try the next fencing level if there is one (unless we're in the "on" + * phase of a remapped "reboot", because we ignore errors in that case) + */ + } while ((op->phase != st_phase_on) + && pcmk_is_set(op->call_options, st_opt_topology) + && (advance_topology_level(op, false) == pcmk_rc_ok)); + + if ((stonith_watchdog_timeout_ms > 0) + && pcmk__is_fencing_action(op->action) + && pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none) + && node_does_watchdog_fencing(op->target)) { + crm_info("Couldn't contact watchdog-fencing target-node (%s)", + op->target); + /* check_watchdog_fencing_and_wait will log additional info */ + } else { + crm_notice("Couldn't find anyone to fence (%s) %s using %s", + op->action, op->target, (device? device : "any device")); + } + return NULL; +} + +static int +get_device_timeout(const remote_fencing_op_t *op, + const peer_device_info_t *peer, const char *device, + bool with_delay) +{ + device_properties_t *props; + int delay = 0; + + if (!peer || !device) { + return op->base_timeout; + } + + props = g_hash_table_lookup(peer->devices, device); + if (!props) { + return op->base_timeout; + } + + // op->delay < 0 means disable any static/random fencing delays + if (with_delay && op->delay >= 0) { + // delay_base is eventually limited by delay_max + delay = (props->delay_max[op->phase] > 0 ? + props->delay_max[op->phase] : props->delay_base[op->phase]); + } + + return (props->custom_action_timeout[op->phase]? + props->custom_action_timeout[op->phase] : op->base_timeout) + + delay; +} + +struct timeout_data { + const remote_fencing_op_t *op; + const peer_device_info_t *peer; + int total_timeout; +}; + +/*! + * \internal + * \brief Add timeout to a total if device has not been executed yet + * + * \param[in] key GHashTable key (device ID) + * \param[in] value GHashTable value (device properties) + * \param[in,out] user_data Timeout data + */ +static void +add_device_timeout(gpointer key, gpointer value, gpointer user_data) +{ + const char *device_id = key; + device_properties_t *props = value; + struct timeout_data *timeout = user_data; + + if (!props->executed[timeout->op->phase] + && !props->disallowed[timeout->op->phase]) { + timeout->total_timeout += get_device_timeout(timeout->op, timeout->peer, + device_id, true); + } +} + +static int +get_peer_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer) +{ + struct timeout_data timeout; + + timeout.op = op; + timeout.peer = peer; + timeout.total_timeout = 0; + + g_hash_table_foreach(peer->devices, add_device_timeout, &timeout); + + return (timeout.total_timeout? timeout.total_timeout : op->base_timeout); +} + +static int +get_op_total_timeout(const remote_fencing_op_t *op, + const peer_device_info_t *chosen_peer) +{ + int total_timeout = 0; + stonith_topology_t *tp = find_topology_for_host(op->target); + + if (pcmk_is_set(op->call_options, st_opt_topology) && tp) { + int i; + GList *device_list = NULL; + GList *iter = NULL; + GList *auto_list = NULL; + + if (pcmk__str_eq(op->action, "on", pcmk__str_none) + && (op->automatic_list != NULL)) { + auto_list = g_list_copy(op->automatic_list); + } + + /* Yep, this looks scary, nested loops all over the place. + * Here is what is going on. + * Loop1: Iterate through fencing levels. + * Loop2: If a fencing level has devices, loop through each device + * Loop3: For each device in a fencing level, see what peer owns it + * and what that peer has reported the timeout is for the device. + */ + for (i = 0; i < ST_LEVEL_MAX; i++) { + if (!tp->levels[i]) { + continue; + } + for (device_list = tp->levels[i]; device_list; device_list = device_list->next) { + /* in case of watchdog-device we add the timeout to the budget + regardless of if we got a reply or not + */ + if ((stonith_watchdog_timeout_ms > 0) + && pcmk__is_fencing_action(op->action) + && pcmk__str_eq(device_list->data, STONITH_WATCHDOG_ID, + pcmk__str_none) + && node_does_watchdog_fencing(op->target)) { + total_timeout += stonith_watchdog_timeout_ms / 1000; + continue; + } + + for (iter = op->query_results; iter != NULL; iter = iter->next) { + const peer_device_info_t *peer = iter->data; + + if (auto_list) { + GList *match = g_list_find_custom(auto_list, device_list->data, + sort_strings); + if (match) { + auto_list = g_list_remove(auto_list, match->data); + } + } + + if (find_peer_device(op, peer, device_list->data, + fenced_support_flag(op->action))) { + total_timeout += get_device_timeout(op, peer, + device_list->data, + true); + break; + } + } /* End Loop3: match device with peer that owns device, find device's timeout period */ + } /* End Loop2: iterate through devices at a specific level */ + } /*End Loop1: iterate through fencing levels */ + + //Add only exists automatic_list device timeout + if (auto_list) { + for (iter = auto_list; iter != NULL; iter = iter->next) { + GList *iter2 = NULL; + + for (iter2 = op->query_results; iter2 != NULL; iter = iter2->next) { + peer_device_info_t *peer = iter2->data; + if (find_peer_device(op, peer, iter->data, st_device_supports_on)) { + total_timeout += get_device_timeout(op, peer, + iter->data, true); + break; + } + } + } + } + + g_list_free(auto_list); + + } else if (chosen_peer) { + total_timeout = get_peer_timeout(op, chosen_peer); + } else { + total_timeout = op->base_timeout; + } + + /* Take any requested fencing delay into account to prevent it from eating + * up the total timeout. + */ + return ((total_timeout ? total_timeout : op->base_timeout) + + (op->delay > 0 ? op->delay : 0)); +} + +static void +report_timeout_period(remote_fencing_op_t * op, int op_timeout) +{ + GList *iter = NULL; + xmlNode *update = NULL; + const char *client_node = NULL; + const char *client_id = NULL; + const char *call_id = NULL; + + if (op->call_options & st_opt_sync_call) { + /* There is no reason to report the timeout for a synchronous call. It + * is impossible to use the reported timeout to do anything when the client + * is blocking for the response. This update is only important for + * async calls that require a callback to report the results in. */ + return; + } else if (!op->request) { + return; + } + + crm_trace("Reporting timeout for %s (id=%.8s)", op->client_name, op->id); + client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE); + call_id = crm_element_value(op->request, F_STONITH_CALLID); + client_id = crm_element_value(op->request, F_STONITH_CLIENTID); + if (!client_node || !call_id || !client_id) { + return; + } + + if (pcmk__str_eq(client_node, stonith_our_uname, pcmk__str_casei)) { + // Client is connected to this node, so send update directly to them + do_stonith_async_timeout_update(client_id, call_id, op_timeout); + return; + } + + /* The client is connected to another node, relay this update to them */ + update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0); + crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id); + crm_xml_add(update, F_STONITH_CLIENTID, client_id); + crm_xml_add(update, F_STONITH_CALLID, call_id); + crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout); + + send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE); + + free_xml(update); + + for (iter = op->duplicates; iter != NULL; iter = iter->next) { + remote_fencing_op_t *dup = iter->data; + + crm_trace("Reporting timeout for duplicate %.8s to client %s", + dup->id, dup->client_name); + report_timeout_period(iter->data, op_timeout); + } +} + +/*! + * \internal + * \brief Advance an operation to the next device in its topology + * + * \param[in,out] op Fencer operation to advance + * \param[in] device ID of device that just completed + * \param[in,out] msg If not NULL, XML reply of last delegated operation + */ +static void +advance_topology_device_in_level(remote_fencing_op_t *op, const char *device, + xmlNode *msg) +{ + /* Advance to the next device at this topology level, if any */ + if (op->devices) { + op->devices = op->devices->next; + } + + /* Handle automatic unfencing if an "on" action was requested */ + if ((op->phase == st_phase_requested) + && pcmk__str_eq(op->action, "on", pcmk__str_none)) { + /* If the device we just executed was required, it's not anymore */ + remove_required_device(op, device); + + /* If there are no more devices at this topology level, run through any + * remaining devices with automatic unfencing + */ + if (op->devices == NULL) { + op->devices = op->automatic_list; + } + } + + if ((op->devices == NULL) && (op->phase == st_phase_off)) { + /* We're done with this level and with required devices, but we had + * remapped "reboot" to "off", so start over with "on". If any devices + * need to be turned back on, op->devices will be non-NULL after this. + */ + op_phase_on(op); + } + + // This function is only called if the previous device succeeded + pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + + if (op->devices) { + /* Necessary devices remain, so execute the next one */ + crm_trace("Next targeting %s on behalf of %s@%s", + op->target, op->client_name, op->originator); + + // The requested delay has been applied for the first device + if (op->delay > 0) { + op->delay = 0; + } + + request_peer_fencing(op, NULL); + } else { + /* We're done with all devices and phases, so finalize operation */ + crm_trace("Marking complex fencing op targeting %s as complete", + op->target); + op->state = st_done; + finalize_op(op, msg, false); + } +} + +static gboolean +check_watchdog_fencing_and_wait(remote_fencing_op_t * op) +{ + if (node_does_watchdog_fencing(op->target)) { + + crm_notice("Waiting %lds for %s to self-fence (%s) for " + "client %s " CRM_XS " id=%.8s", + (stonith_watchdog_timeout_ms / 1000), + op->target, op->action, op->client_name, op->id); + + if (op->op_timer_one) { + g_source_remove(op->op_timer_one); + } + op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, + remote_op_watchdog_done, op); + return TRUE; + } else { + crm_debug("Skipping fallback to watchdog-fencing as %s is " + "not in host-list", op->target); + } + return FALSE; +} + +/*! + * \internal + * \brief Ask a peer to execute a fencing operation + * + * \param[in,out] op Fencing operation to be executed + * \param[in,out] peer If NULL or topology is in use, choose best peer to + * execute the fencing, otherwise use this peer + */ +static void +request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) +{ + const char *device = NULL; + int timeout; + + CRM_CHECK(op != NULL, return); + + crm_trace("Action %.8s targeting %s for %s is %s", + op->id, op->target, op->client_name, + stonith_op_state_str(op->state)); + + if ((op->phase == st_phase_on) && (op->devices != NULL)) { + /* We are in the "on" phase of a remapped topology reboot. If this + * device has pcmk_reboot_action="off", or doesn't support the "on" + * action, skip it. + * + * We can't check device properties at this point because we haven't + * chosen a peer for this stage yet. Instead, we check the local node's + * knowledge about the device. If different versions of the fence agent + * are installed on different nodes, there's a chance this could be + * mistaken, but the worst that could happen is we don't try turning the + * node back on when we should. + */ + device = op->devices->data; + if (pcmk__str_eq(fenced_device_reboot_action(device), "off", + pcmk__str_none)) { + crm_info("Not turning %s back on using %s because the device is " + "configured to stay off (pcmk_reboot_action='off')", + op->target, device); + advance_topology_device_in_level(op, device, NULL); + return; + } + if (!fenced_device_supports_on(device)) { + crm_info("Not turning %s back on using %s because the agent " + "doesn't support 'on'", op->target, device); + advance_topology_device_in_level(op, device, NULL); + return; + } + } + + timeout = op->base_timeout; + if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) { + peer = stonith_choose_peer(op); + } + + if (!op->op_timer_total) { + op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, peer); + op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op); + report_timeout_period(op, op->total_timeout); + crm_info("Total timeout set to %d for peer's fencing targeting %s for %s" + CRM_XS "id=%.8s", + op->total_timeout, op->target, op->client_name, op->id); + } + + if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) { + /* Ignore the caller's peer preference if topology is in use, because + * that peer might not have access to the required device. With + * topology, stonith_choose_peer() removes the device from further + * consideration, so the timeout must be calculated beforehand. + * + * @TODO Basing the total timeout on the caller's preferred peer (above) + * is less than ideal. + */ + peer = stonith_choose_peer(op); + + device = op->devices->data; + /* Fencing timeout sent to peer takes no delay into account. + * The peer will add a dedicated timer for any delay upon + * schedule_stonith_command(). + */ + timeout = get_device_timeout(op, peer, device, false); + } + + if (peer) { + /* Take any requested fencing delay into account to prevent it from eating + * up the timeout. + */ + int timeout_one = (op->delay > 0 ? + TIMEOUT_MULTIPLY_FACTOR * op->delay : 0); + xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0); + + crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id); + crm_xml_add(remote_op, F_STONITH_TARGET, op->target); + crm_xml_add(remote_op, F_STONITH_ACTION, op->action); + crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator); + crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id); + crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name); + crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout); + crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options); + crm_xml_add_int(remote_op, F_STONITH_DELAY, op->delay); + + if (device) { + timeout_one += TIMEOUT_MULTIPLY_FACTOR * + get_device_timeout(op, peer, device, true); + crm_notice("Requesting that %s perform '%s' action targeting %s " + "using %s " CRM_XS " for client %s (%ds)", + peer->host, op->action, op->target, device, + op->client_name, timeout_one); + crm_xml_add(remote_op, F_STONITH_DEVICE, device); + + } else { + timeout_one += TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer); + crm_notice("Requesting that %s perform '%s' action targeting %s " + CRM_XS " for client %s (%ds, %lds)", + peer->host, op->action, op->target, op->client_name, + timeout_one, stonith_watchdog_timeout_ms); + } + + op->state = st_exec; + if (op->op_timer_one) { + g_source_remove(op->op_timer_one); + op->op_timer_one = 0; + } + + if (!((stonith_watchdog_timeout_ms > 0) + && (pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none) + || (pcmk__str_eq(peer->host, op->target, pcmk__str_casei) + && pcmk__is_fencing_action(op->action))) + && check_watchdog_fencing_and_wait(op))) { + + /* Some thoughts about self-fencing cases reaching this point: + - Actually check in check_watchdog_fencing_and_wait + shouldn't fail if STONITH_WATCHDOG_ID is + chosen as fencing-device and it being present implies + watchdog-fencing is enabled anyway + - If watchdog-fencing is disabled either in general or for + a specific target - detected in check_watchdog_fencing_and_wait - + for some other kind of self-fencing we can't expect + a success answer but timeout is fine if the node doesn't + come back in between + - Delicate might be the case where we have watchdog-fencing + enabled for a node but the watchdog-fencing-device isn't + explicitly chosen for suicide. Local pe-execution in sbd + may detect the node as unclean and lead to timely suicide. + Otherwise the selection of stonith-watchdog-timeout at + least is questionable. + */ + + /* coming here we're not waiting for watchdog timeout - + thus engage timer with timout evaluated before */ + op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op); + } + + send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE); + peer->tried = TRUE; + free_xml(remote_op); + return; + + } else if (op->phase == st_phase_on) { + /* A remapped "on" cannot be executed, but the node was already + * turned off successfully, so ignore the error and continue. + */ + crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s " + "after successful 'off'", device, op->target); + advance_topology_device_in_level(op, device, NULL); + return; + + } else if (op->owner == FALSE) { + crm_err("Fencing (%s) targeting %s for client %s is not ours to control", + op->action, op->target, op->client_name); + + } else if (op->query_timer == 0) { + /* We've exhausted all available peers */ + crm_info("No remaining peers capable of fencing (%s) %s for client %s " + CRM_XS " state=%s", op->action, op->target, op->client_name, + stonith_op_state_str(op->state)); + CRM_CHECK(op->state < st_done, return); + finalize_timed_out_op(op, "All nodes failed, or are unable, to " + "fence target"); + + } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) { + /* if the operation never left the query state, + * but we have all the expected replies, then no devices + * are available to execute the fencing operation. */ + + if(stonith_watchdog_timeout_ms > 0 && pcmk__str_eq(device, + STONITH_WATCHDOG_ID, pcmk__str_null_matches)) { + if (check_watchdog_fencing_and_wait(op)) { + return; + } + } + + if (op->state == st_query) { + crm_info("No peers (out of %d) have devices capable of fencing " + "(%s) %s for client %s " CRM_XS " state=%s", + op->replies, op->action, op->target, op->client_name, + stonith_op_state_str(op->state)); + + pcmk__reset_result(&op->result); + pcmk__set_result(&op->result, CRM_EX_ERROR, + PCMK_EXEC_NO_FENCE_DEVICE, NULL); + } else { + if (pcmk_is_set(op->call_options, st_opt_topology)) { + pcmk__reset_result(&op->result); + pcmk__set_result(&op->result, CRM_EX_ERROR, + PCMK_EXEC_NO_FENCE_DEVICE, NULL); + } + /* ... else use existing result from previous failed attempt + * (topology is not in use, and no devices remain to be attempted). + * Overwriting the result with PCMK_EXEC_NO_FENCE_DEVICE would + * prevent finalize_op() from setting the correct delegate if + * needed. + */ + + crm_info("No peers (out of %d) are capable of fencing (%s) %s " + "for client %s " CRM_XS " state=%s", + op->replies, op->action, op->target, op->client_name, + stonith_op_state_str(op->state)); + } + + op->state = st_failed; + finalize_op(op, NULL, false); + + } else { + crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s " + "for client %s " CRM_XS " id=%.8s", + op->action, op->target, (device? " using " : ""), + (device? device : ""), op->client_name, op->id); + } +} + +/*! + * \internal + * \brief Comparison function for sorting query results + * + * \param[in] a GList item to compare + * \param[in] b GList item to compare + * + * \return Per the glib documentation, "a negative integer if the first value + * comes before the second, 0 if they are equal, or a positive integer + * if the first value comes after the second." + */ +static gint +sort_peers(gconstpointer a, gconstpointer b) +{ + const peer_device_info_t *peer_a = a; + const peer_device_info_t *peer_b = b; + + return (peer_b->ndevices - peer_a->ndevices); +} + +/*! + * \internal + * \brief Determine if all the devices in the topology are found or not + * + * \param[in] op Fencing operation with topology to check + */ +static gboolean +all_topology_devices_found(const remote_fencing_op_t *op) +{ + GList *device = NULL; + GList *iter = NULL; + device_properties_t *match = NULL; + stonith_topology_t *tp = NULL; + gboolean skip_target = FALSE; + int i; + + tp = find_topology_for_host(op->target); + if (!tp) { + return FALSE; + } + if (pcmk__is_fencing_action(op->action)) { + /* Don't count the devices on the target node if we are killing + * the target node. */ + skip_target = TRUE; + } + + for (i = 0; i < ST_LEVEL_MAX; i++) { + for (device = tp->levels[i]; device; device = device->next) { + match = NULL; + for (iter = op->query_results; iter && !match; iter = iter->next) { + peer_device_info_t *peer = iter->data; + + if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { + continue; + } + match = find_peer_device(op, peer, device->data, st_device_supports_none); + } + if (!match) { + return FALSE; + } + } + } + + return TRUE; +} + +/*! + * \internal + * \brief Parse action-specific device properties from XML + * + * \param[in] xml XML element containing the properties + * \param[in] peer Name of peer that sent XML (for logs) + * \param[in] device Device ID (for logs) + * \param[in] action Action the properties relate to (for logs) + * \param[in,out] op Fencing operation that properties are being parsed for + * \param[in] phase Phase the properties relate to + * \param[in,out] props Device properties to update + */ +static void +parse_action_specific(const xmlNode *xml, const char *peer, const char *device, + const char *action, remote_fencing_op_t *op, + enum st_remap_phase phase, device_properties_t *props) +{ + props->custom_action_timeout[phase] = 0; + crm_element_value_int(xml, F_STONITH_ACTION_TIMEOUT, + &props->custom_action_timeout[phase]); + if (props->custom_action_timeout[phase]) { + crm_trace("Peer %s with device %s returned %s action timeout %d", + peer, device, action, props->custom_action_timeout[phase]); + } + + props->delay_max[phase] = 0; + crm_element_value_int(xml, F_STONITH_DELAY_MAX, &props->delay_max[phase]); + if (props->delay_max[phase]) { + crm_trace("Peer %s with device %s returned maximum of random delay %d for %s", + peer, device, props->delay_max[phase], action); + } + + props->delay_base[phase] = 0; + crm_element_value_int(xml, F_STONITH_DELAY_BASE, &props->delay_base[phase]); + if (props->delay_base[phase]) { + crm_trace("Peer %s with device %s returned base delay %d for %s", + peer, device, props->delay_base[phase], action); + } + + /* Handle devices with automatic unfencing */ + if (pcmk__str_eq(action, "on", pcmk__str_none)) { + int required = 0; + + crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required); + if (required) { + crm_trace("Peer %s requires device %s to execute for action %s", + peer, device, action); + add_required_device(op, device); + } + } + + /* If a reboot is remapped to off+on, it's possible that a node is allowed + * to perform one action but not another. + */ + if (pcmk__xe_attr_is_true(xml, F_STONITH_ACTION_DISALLOWED)) { + props->disallowed[phase] = TRUE; + crm_trace("Peer %s is disallowed from executing %s for device %s", + peer, action, device); + } +} + +/*! + * \internal + * \brief Parse one device's properties from peer's XML query reply + * + * \param[in] xml XML node containing device properties + * \param[in,out] op Operation that query and reply relate to + * \param[in,out] peer Peer's device information + * \param[in] device ID of device being parsed + */ +static void +add_device_properties(const xmlNode *xml, remote_fencing_op_t *op, + peer_device_info_t *peer, const char *device) +{ + xmlNode *child; + int verified = 0; + device_properties_t *props = calloc(1, sizeof(device_properties_t)); + int flags = st_device_supports_on; /* Old nodes that don't set the flag assume they support the on action */ + + /* Add a new entry to this peer's devices list */ + CRM_ASSERT(props != NULL); + g_hash_table_insert(peer->devices, strdup(device), props); + + /* Peers with verified (monitored) access will be preferred */ + crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified); + if (verified) { + crm_trace("Peer %s has confirmed a verified device %s", + peer->host, device); + props->verified = TRUE; + } + + crm_element_value_int(xml, F_STONITH_DEVICE_SUPPORT_FLAGS, &flags); + props->device_support_flags = flags; + + /* Parse action-specific device properties */ + parse_action_specific(xml, peer->host, device, op_requested_action(op), + op, st_phase_requested, props); + for (child = pcmk__xml_first_child(xml); child != NULL; + child = pcmk__xml_next(child)) { + /* Replies for "reboot" operations will include the action-specific + * values for "off" and "on" in child elements, just in case the reboot + * winds up getting remapped. + */ + if (pcmk__str_eq(ID(child), "off", pcmk__str_none)) { + parse_action_specific(child, peer->host, device, "off", + op, st_phase_off, props); + } else if (pcmk__str_eq(ID(child), "on", pcmk__str_none)) { + parse_action_specific(child, peer->host, device, "on", + op, st_phase_on, props); + } + } +} + +/*! + * \internal + * \brief Parse a peer's XML query reply and add it to operation's results + * + * \param[in,out] op Operation that query and reply relate to + * \param[in] host Name of peer that sent this reply + * \param[in] ndevices Number of devices expected in reply + * \param[in] xml XML node containing device list + * + * \return Newly allocated result structure with parsed reply + */ +static peer_device_info_t * +add_result(remote_fencing_op_t *op, const char *host, int ndevices, + const xmlNode *xml) +{ + peer_device_info_t *peer = calloc(1, sizeof(peer_device_info_t)); + xmlNode *child; + + // cppcheck seems not to understand the abort logic in CRM_CHECK + // cppcheck-suppress memleak + CRM_CHECK(peer != NULL, return NULL); + peer->host = strdup(host); + peer->devices = pcmk__strkey_table(free, free); + + /* Each child element describes one capable device available to the peer */ + for (child = pcmk__xml_first_child(xml); child != NULL; + child = pcmk__xml_next(child)) { + const char *device = ID(child); + + if (device) { + add_device_properties(child, op, peer, device); + } + } + + peer->ndevices = g_hash_table_size(peer->devices); + CRM_CHECK(ndevices == peer->ndevices, + crm_err("Query claimed to have %d device%s but %d found", + ndevices, pcmk__plural_s(ndevices), peer->ndevices)); + + op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers); + return peer; +} + +/*! + * \internal + * \brief Handle a peer's reply to our fencing query + * + * Parse a query result from XML and store it in the remote operation + * table, and when enough replies have been received, issue a fencing request. + * + * \param[in] msg XML reply received + * + * \return pcmk_ok on success, -errno on error + * + * \note See initiate_remote_stonith_op() for how the XML query was initially + * formed, and stonith_query() for how the peer formed its XML reply. + */ +int +process_remote_stonith_query(xmlNode *msg) +{ + int ndevices = 0; + gboolean host_is_target = FALSE; + gboolean have_all_replies = FALSE; + const char *id = NULL; + const char *host = NULL; + remote_fencing_op_t *op = NULL; + peer_device_info_t *peer = NULL; + uint32_t replies_expected; + xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); + + CRM_CHECK(dev != NULL, return -EPROTO); + + id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); + CRM_CHECK(id != NULL, return -EPROTO); + + dev = get_xpath_object("//@" F_STONITH_AVAILABLE_DEVICES, msg, LOG_ERR); + CRM_CHECK(dev != NULL, return -EPROTO); + crm_element_value_int(dev, F_STONITH_AVAILABLE_DEVICES, &ndevices); + + op = g_hash_table_lookup(stonith_remote_op_list, id); + if (op == NULL) { + crm_debug("Received query reply for unknown or expired operation %s", + id); + return -EOPNOTSUPP; + } + + replies_expected = fencing_active_peers(); + if (op->replies_expected < replies_expected) { + replies_expected = op->replies_expected; + } + if ((++op->replies >= replies_expected) && (op->state == st_query)) { + have_all_replies = TRUE; + } + host = crm_element_value(msg, F_ORIG); + host_is_target = pcmk__str_eq(host, op->target, pcmk__str_casei); + + crm_info("Query result %d of %d from %s for %s/%s (%d device%s) %s", + op->replies, replies_expected, host, + op->target, op->action, ndevices, pcmk__plural_s(ndevices), id); + if (ndevices > 0) { + peer = add_result(op, host, ndevices, dev); + } + + pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + + if (pcmk_is_set(op->call_options, st_opt_topology)) { + /* If we start the fencing before all the topology results are in, + * it is possible fencing levels will be skipped because of the missing + * query results. */ + if (op->state == st_query && all_topology_devices_found(op)) { + /* All the query results are in for the topology, start the fencing ops. */ + crm_trace("All topology devices found"); + request_peer_fencing(op, peer); + + } else if (have_all_replies) { + crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ", + replies_expected, op->replies); + request_peer_fencing(op, NULL); + } + + } else if (op->state == st_query) { + int nverified = count_peer_devices(op, peer, TRUE, + fenced_support_flag(op->action)); + + /* We have a result for a non-topology fencing op that looks promising, + * go ahead and start fencing before query timeout */ + if ((peer != NULL) && !host_is_target && nverified) { + /* we have a verified device living on a peer that is not the target */ + crm_trace("Found %d verified device%s", + nverified, pcmk__plural_s(nverified)); + request_peer_fencing(op, peer); + + } else if (have_all_replies) { + crm_info("All query replies have arrived, continuing (%d expected/%d received) ", + replies_expected, op->replies); + request_peer_fencing(op, NULL); + + } else { + crm_trace("Waiting for more peer results before launching fencing operation"); + } + + } else if ((peer != NULL) && (op->state == st_done)) { + crm_info("Discarding query result from %s (%d device%s): " + "Operation is %s", peer->host, + peer->ndevices, pcmk__plural_s(peer->ndevices), + stonith_op_state_str(op->state)); + } + + return pcmk_ok; +} + +/*! + * \internal + * \brief Handle a peer's reply to a fencing request + * + * Parse a fencing reply from XML, and either finalize the operation + * or attempt another device as appropriate. + * + * \param[in] msg XML reply received + */ +void +fenced_process_fencing_reply(xmlNode *msg) +{ + const char *id = NULL; + const char *device = NULL; + remote_fencing_op_t *op = NULL; + xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); + pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; + + CRM_CHECK(dev != NULL, return); + + id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); + CRM_CHECK(id != NULL, return); + + dev = stonith__find_xe_with_result(msg); + CRM_CHECK(dev != NULL, return); + + stonith__xe_get_result(dev, &result); + + device = crm_element_value(dev, F_STONITH_DEVICE); + + if (stonith_remote_op_list) { + op = g_hash_table_lookup(stonith_remote_op_list, id); + } + + if ((op == NULL) && pcmk__result_ok(&result)) { + /* Record successful fencing operations */ + const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID); + + op = create_remote_stonith_op(client_id, dev, TRUE); + } + + if (op == NULL) { + /* Could be for an event that began before we started */ + /* TODO: Record the op for later querying */ + crm_info("Received peer result of unknown or expired operation %s", id); + pcmk__reset_result(&result); + return; + } + + pcmk__reset_result(&op->result); + op->result = result; // The operation takes ownership of the result + + if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) { + crm_err("Received outdated reply for device %s (instead of %s) to " + "fence (%s) %s. Operation already timed out at peer level.", + device, (const char *) op->devices->data, op->action, op->target); + return; + } + + if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) { + if (pcmk__result_ok(&op->result)) { + op->state = st_done; + } else { + op->state = st_failed; + } + finalize_op(op, msg, false); + return; + + } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) { + /* If this isn't a remote level broadcast, and we are not the + * originator of the operation, we should not be receiving this msg. */ + crm_err("Received non-broadcast fencing result for operation %.8s " + "we do not own (device %s targeting %s)", + op->id, device, op->target); + return; + } + + if (pcmk_is_set(op->call_options, st_opt_topology)) { + const char *device = NULL; + const char *reason = op->result.exit_reason; + + /* We own the op, and it is complete. broadcast the result to all nodes + * and notify our local clients. */ + if (op->state == st_done) { + finalize_op(op, msg, false); + return; + } + + device = crm_element_value(msg, F_STONITH_DEVICE); + + if ((op->phase == 2) && !pcmk__result_ok(&op->result)) { + /* A remapped "on" failed, but the node was already turned off + * successfully, so ignore the error and continue. + */ + crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s " + "after successful 'off'", + device, pcmk_exec_status_str(op->result.execution_status), + (reason == NULL)? "" : ": ", + (reason == NULL)? "" : reason, + op->target); + pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + } else { + crm_notice("Action '%s' targeting %s%s%s on behalf of %s@%s: " + "%s%s%s%s", + op->action, op->target, + ((device == NULL)? "" : " using "), + ((device == NULL)? "" : device), + op->client_name, + op->originator, + pcmk_exec_status_str(op->result.execution_status), + (reason == NULL)? "" : " (", + (reason == NULL)? "" : reason, + (reason == NULL)? "" : ")"); + } + + if (pcmk__result_ok(&op->result)) { + /* An operation completed successfully. Try another device if + * necessary, otherwise mark the operation as done. */ + advance_topology_device_in_level(op, device, msg); + return; + } else { + /* This device failed, time to try another topology level. If no other + * levels are available, mark this operation as failed and report results. */ + if (advance_topology_level(op, false) != pcmk_rc_ok) { + op->state = st_failed; + finalize_op(op, msg, false); + return; + } + } + + } else if (pcmk__result_ok(&op->result) && (op->devices == NULL)) { + op->state = st_done; + finalize_op(op, msg, false); + return; + + } else if ((op->result.execution_status == PCMK_EXEC_TIMEOUT) + && (op->devices == NULL)) { + /* If the operation timed out don't bother retrying other peers. */ + op->state = st_failed; + finalize_op(op, msg, false); + return; + + } else { + /* fall-through and attempt other fencing action using another peer */ + } + + /* Retry on failure */ + crm_trace("Next for %s on behalf of %s@%s (result was: %s)", + op->target, op->originator, op->client_name, + pcmk_exec_status_str(op->result.execution_status)); + request_peer_fencing(op, NULL); +} + +gboolean +stonith_check_fence_tolerance(int tolerance, const char *target, const char *action) +{ + GHashTableIter iter; + time_t now = time(NULL); + remote_fencing_op_t *rop = NULL; + + if (tolerance <= 0 || !stonith_remote_op_list || target == NULL || + action == NULL) { + return FALSE; + } + + g_hash_table_iter_init(&iter, stonith_remote_op_list); + while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) { + if (strcmp(rop->target, target) != 0) { + continue; + } else if (rop->state != st_done) { + continue; + /* We don't have to worry about remapped reboots here + * because if state is done, any remapping has been undone + */ + } else if (strcmp(rop->action, action) != 0) { + continue; + } else if ((rop->completed + tolerance) < now) { + continue; + } + + crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s", + target, action, tolerance, rop->delegate, rop->originator); + return TRUE; + } + return FALSE; +} |